pax_global_header00006660000000000000000000000064146517055650014530gustar00rootroot0000000000000052 comment=d0a1ee18511714a2c33cc0920a068c05c1f0f8fb m4ri-release-20240729/000077500000000000000000000000001465170556500142745ustar00rootroot00000000000000m4ri-release-20240729/.clang-format000066400000000000000000000036451465170556500166570ustar00rootroot00000000000000--- Language: Cpp AccessModifierOffset: -2 AlignAfterOpenBracket: true AlignConsecutiveAssignments: true AlignEscapedNewlinesLeft: false AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: true AllowShortCaseLabelsOnASingleLine: true AllowShortIfStatementsOnASingleLine: true AllowShortLoopsOnASingleLine: true AllowShortFunctionsOnASingleLine: All AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakTemplateDeclarations: false AlwaysBreakBeforeMultilineStrings: false BreakBeforeBinaryOperators: None BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BinPackParameters: true BinPackArguments: true ColumnLimit: 100 ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 DerivePointerAlignment: false ExperimentalAutoDetectBinPacking: false IndentCaseLabels: false IndentWrappedFunctionNames: false IndentFunctionDeclarationAfterType: false MaxEmptyLinesToKeep: 1 KeepEmptyLinesAtTheStartOfBlocks: true NamespaceIndentation: None ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakString: 1000 PenaltyBreakFirstLessLess: 120 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Right SpacesBeforeTrailingComments: 2 Standard: Cpp11 IndentWidth: 2 TabWidth: 8 UseTab: Never BreakBeforeBraces: Attach SpacesInParentheses: false SpacesInSquareBrackets: false SpacesInAngles: false SpaceInEmptyParentheses: false SpacesInCStyleCastParentheses: false SpaceAfterCStyleCast: false SpacesInContainerLiterals: true SpaceBeforeAssignmentOperators: true ContinuationIndentWidth: 4 CommentPragmas: '^ IWYU pragma:' ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] SpaceBeforeParens: ControlStatements DisableFormat: false ... m4ri-release-20240729/.github/000077500000000000000000000000001465170556500156345ustar00rootroot00000000000000m4ri-release-20240729/.github/workflows/000077500000000000000000000000001465170556500176715ustar00rootroot00000000000000m4ri-release-20240729/.github/workflows/distcheck.yml000066400000000000000000000012431465170556500223550ustar00rootroot00000000000000name: Distcheck on: push: pull_request: env: JOBS: 2 jobs: linux: runs-on: ubuntu-latest steps: - name: Check out uses: actions/checkout@v2 - name: Install prerequisites run: | sudo DEBIAN_FRONTEND=noninteractive apt-get update sudo DEBIAN_FRONTEND=noninteractive apt-get install autoconf automake libtool - name: Configure run: | autoreconf -i ASAN_OPTIONS=detect_leaks=0 CFLAGS="-fsanitize=address" ./configure --enable-debug - name: Check run: ASAN_OPTIONS=detect_leaks=0 make -j $JOBS check - name: Distcheck run: make -j $JOBS distcheck m4ri-release-20240729/.gitignore000066400000000000000000000015301465170556500162630ustar00rootroot00000000000000.libs *.trs *.lo *.o m4/lt*.m4 Makefile Makefile.in aclocal.m4 autom4te.cache/ compile config.guess config.status config.sub config.log configure depcomp install-sh libm4ri.la libtool ltmain.sh m4/libtool.m4 m4ri.pc missing test-driver test_djb test_colswap test_elimination test_invert test_kernel test_misc test_multiplication test_ple test_random test_smallops test_solve test_transpose test_trsm test_alignment /tests/*.log /bench/bench_elimination /bench/bench_elimination_sparse /bench/bench_invert /bench/bench_multiplication /bench/bench_mzd /bench/bench_ple /bench/bench_rank /bench/bench_trsm /bench/cpucycles-20060326/cpucycles.h /bench/cpucycles-20060326/do.notes /bench/cpucycles.h /*/.deps/ /*/.dirstamp /m4ri/config.h /m4ri/config.h.in /m4ri/m4ri_config.h /m4ri/stamp-h1 /bench/bench_m4rm /m4ri/config.h.in~ /compile_commands.json /.cache/ m4ri-release-20240729/AUTHORS000066400000000000000000000024001465170556500153400ustar00rootroot00000000000000 * Tim Abbott: Debian-isation & advice on correct libtool versioning; * Martin Albrecht: maintainer, release manager, peformance tuning (M4RM, M4RI, Strassen, PLE), initial M4RM implementation, parallelisation, PLE factorisation (MMPF algorithm); * Gregory Bard: initial author, M4RI algorithm and initial implementation; * Marco Bodrato: new Strassen-like sequence for matrix multiplication and squaring which improves performance for squaring; * Michael Brickenstein: PolyBoRi author, standard conformity contributions for ANSIC, test data, discussion/suggestion of performance improvements, fast vector-matrix products; * Alexander Dreyer: PolyBoRi author, standard conformity contributions for ANSIC; * Jean-Guillaume Dumas: linear system resolution; * William Hart: many performance improvements for matrix multiplication and in general; * David Harvey: parallel parity function used in classical multiplication; * David Kirkby: portability issues (Solaris, HP Unix); * Clément Pernet: PLS factorisation, triangular system solving (TRSM); * Wael Said: test cases, feedback; * Carlo Wood: bit-level optimisation (transpose, column swaps), refactoring, benchmark(et)ing framework, test code, build system clean-up;m4ri-release-20240729/COPYING000066400000000000000000000431301465170556500153300ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19yy name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. m4ri-release-20240729/ChangeLog000066400000000000000000000000001465170556500160340ustar00rootroot00000000000000m4ri-release-20240729/Makefile.am000066400000000000000000000026371465170556500163400ustar00rootroot00000000000000AUTOMAKE_OPTIONS = foreign subdir-objects ACLOCAL_AMFLAGS = -I m4 AM_CFLAGS=${SIMD_FLAGS} ${OPENMP_CFLAGS} ${DEBUG_FLAGS} ${LIBPNG_CFLAGS} lib_LTLIBRARIES = libm4ri.la libm4ri_la_SOURCES = \ m4ri/brilliantrussian.c \ m4ri/misc.c \ m4ri/mzd.c \ m4ri/graycode.c \ m4ri/strassen.c \ m4ri/mzp.c \ m4ri/triangular.c \ m4ri/triangular_russian.c \ m4ri/ple.c \ m4ri/ple_russian.c \ m4ri/solve.c \ m4ri/echelonform.c \ m4ri/mmc.c \ m4ri/debug_dump.c \ m4ri/io.c \ m4ri/djb.c \ m4ri/mp.c BUILT_SOURCES = m4ri/m4ri_config.h pkgincludesubdir = $(includedir)/m4ri pkgincludesub_HEADERS = m4ri/m4ri.h \ m4ri/brilliantrussian.h \ m4ri/misc.h \ m4ri/mzd.h \ m4ri/graycode.h \ m4ri/strassen.h \ m4ri/parity.h \ m4ri/mzp.h \ m4ri/triangular.h \ m4ri/triangular_russian.h \ m4ri/ple.h \ m4ri/ple_russian.h \ m4ri/ple_russian_template.h \ m4ri/solve.h \ m4ri/echelonform.h \ m4ri/xor.h \ m4ri/xor_template.h \ m4ri/mmc.h \ m4ri/debug_dump.h \ m4ri/io.h \ m4ri/djb.h \ m4ri/mp.h nodist_pkgincludesub_HEADERS = m4ri/m4ri_config.h EXTRA_DIST=m4ri/Doxyfile pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = m4ri.pc libm4ri_la_LDFLAGS = -release 0.0.$(RELEASE) -no-undefined libm4ri_la_LIBADD = $(LIBPNG_LIBADD) $(LIBM) SUBDIRS = . tests DIST_SUBDIRS = $(SUBDIRS) bench CLANGFORMAT ?= clang-format .PHONY: check-style check-style: $(CLANGFORMAT) -i --style=file m4ri/*.{c,h} tests/*.{c,h} bench/*.{c,h} m4ri-release-20240729/NEWS000066400000000000000000000000001465170556500147610ustar00rootroot00000000000000m4ri-release-20240729/README.md000066400000000000000000000450301465170556500155550ustar00rootroot00000000000000M4RI is a library for fast arithmetic with dense matrices over F2. The name M4RI comes from the first implemented algorithm: The “Method of the Four Russians” inversion algorithm published by Gregory Bard. This algorithm in turn is named after the “Method of the Four Russians” multiplication algorithm which is probably better referred to as Kronrod's method. M4RI is available under the General Public License Version 2 or later (GPLv2+). # Main Features # * basic arithmetic with dense matrices over F2 (addition, equality testing, stacking, augmenting, sub-matrices, randomisation); * asymptotically fast $O(n^{log_2 7})$ matrix multiplication via the Method of the Four Russians (M4RM) & Strassen-Winograd algorithm; * asymptotically fast $O(n^{log_2 7})$ PLE factorisation (Gaussian elimination, system solving, …); * fast row echelon form computation and matrix inversion via the Method of the Four Russians (M4RI, $O(n^{3/log n})$); * asymptotically fast Triangular System solving with Matrices (upper left, lower left, upper right, lower right), * support for the x86/x86_64 SSE2 instruction set where available; * preliminary support for parallelisation on shared memory systems via OpenMP; * and support for Linux, Solaris, and OS X (GCC). See [Further Reading](https://bitbucket.org/malb/m4ri/wiki/Further%20Reading) for implemented algorithms. # Performance See [Performance](https://bitbucket.org/malb/m4ri/wiki/Performance). # OpenMP Support # OpenMP support for parallel multiplication and elimination is enabled with the --enable-openmp configure switch. # Install # If you downloaded M4RI by cloning the mainline tree at https://bitbucket.org/malb/m4ri you need to first run the following command: autoreconf --install Then do the usual ./configure make make check For details see the instructions in the file `INSTALL`. # Documentation # To build the reference manual, ensure that you have Doxygen installed. The HTML version of the reference manual can be built as follows: cd src/ doxygen The built documentation is contained under the doc subdirectory of m4ri/. Once the HTML version is built, you can build the PDF version as follows: cd doc/latex/ make The documentation is also available [here](http://malb.bitbucket.io/m4ri/). # Contributors At least the following people have contributed to the M4RI library. * **[Tim Abbott](http://web.mit.edu/tabbott/www/)**: Debian-isation & advice on correct libtool versioning; * **[Martin Albrecht](http://martinralbrecht.wordpress.com)**: maintainer, release manager, peformance tuning (M4RM, M4RI, Strassen, PLE), initial M4RM implementation, parallelisation, PLE factorisation (MMPF algorithm); * **[Gregory Bard](http://www.math.umd.edu/~bardg/)**: initial author, M4RI algorithm and initial implementation; * **[Marco Bodrato](http://bodrato.it/)**: new [Strassen-like sequence](http://bodrato.it/software/strassen.html) for matrix multiplication and squaring which improves performance for squaring; * **[Michael Brickenstein](http://www.mfo.de/organisation/institute/brickenstein/)**: [PolyBoRi](http://polybori.sourceforge.net) author, standard conformity contributions for ANSIC, test data, discussion/suggestion of performance improvements, fast vector-matrix products; * **[Alexander Dreyer](http://www.itwm.fhg.de/en/as__asemployees__dreyer/dreyer/)**: [PolyBoRi](http://polybori.sourceforge.net) author, standard conformity contributions for ANSIC; * **[Jean-Guillaume Dumas](http://ljk.imag.fr/membres/Jean-Guillaume.Dumas/)**: linear system resolution; * **[William Hart](http://www.warwick.ac.uk/~masfaw/)**: many performance improvements for matrix multiplication and in general; * **[David Harvey](http://cims.nyu.edu/~harvey/)**: parallel parity function used in classical multiplication; * **Jerry James**: bug fixes, dealing with compiler warnings, Fedora Linux packaging; * **David Kirkby**: portability issues (Solaris, HP Unix); * **[Clément Pernet](http://www.math.washington.edu/~pernet/)**: PLE factorisation, triangular system solving (TRSM); * **Wael Said**: test cases, feedback; * **Carlo Wood**: bit-level optimisation (transpose, column swaps), refactoring, benchmark(et)ing framework, test code, build system clean-up; We are grateful to **[William Stein](http://modular.math.washington.edu/)** for providing our hosting and general infrastructure in the past. # Citing M4RI If you use our libraries in a non-trivial part of your research please consider citing them as follows: @manual{M4RI, key = "M4RI", author = "Martin Albrecht and Gregory Bard", organization = "The M4RI~Team", title = "{The M4RI Library -- Version **version**}", year = **year**, url = "\url{https://bitbucket.org/malb/m4ri}", } and cite the appropriate publications mentioned in [Further Reading](https://bitbucket.org/malb/m4ri/wiki/Further%20Reading). # Contact Please contact our [mailinglist](http://groups.google.com/group/m4ri-devel) if there are bugs, questions, comments. # History * **2020/01/25** A new version of M4RI is available with a few bugfixes. It is available at https://bitbucket.org/malb/m4ri/downloads. * **2020/01/15** A new version of M4RI is available with a few small build system tweaks. It is available at https://bitbucket.org/malb/m4ri/downloads. * **2015/04/17** Our hosting for http://m4ri.sagemath.org at University of Washington. is discontinued and we’re moving everything over to https://bitbucket.org/malb/m4ri. A copy of the old website (except for large files) is available at http://malb.bitbucket.io/m4ri-e-website-2008-2015/. * **2014/09/14** A new version of M4RI and M4RIE is available for [download](https://bitbucket.org/malb/m4ri/downloads). The biggest change is that `A->offset` was dropped. Also, various small (multicore) performance improvements were implemented. The update for M4RIE is to maintain compatibility with M4RI. A few improvements were implemented for the mzd_poly module as well. * **2013/04/16** A new version of M4RI is available for [download](https://bitbucket.org/malb/m4ri/downloads). A detailed changlog is available [here](https://bitbucket.org/malb/m4ri/wiki/M4RI-20130416) for M4RI. * **2012/12/21** A new version of M4RI is available for [download](https://bitbucket.org/malb/m4ri/downloads). A detailed changlog is available [here](https://bitbucket.org/malb/m4ri/wiki/M4RI-20121224) for M4RI. See also this [blog post](https://martinralbrecht.wordpress.com/2012/12/21/m4ri-20121224/) for details. * **2012/06/13** New versions of both M4RI and M4RIE are available for [download](https://bitbucket.org/malb/m4ri/downloads). A detailed changlog are available [here](https://bitbucket.org/malb/m4ri/wiki/M4RI-20120613) for M4RI. * **2012/04/13** New versions of both M4RI and M4RIE are available for [download](https://bitbucket.org/malb/m4ri/downloads). Detailed changlogs are available [here](https://bitbucket.org/malb/m4ri/wiki/M4RI-20120415) for M4RI and [here](https://bitbucket.org/malb/m4rie/wiki/M4RIE-20120415) for M4RIE. * **2011/12/04** New versions of both M4RI and M4RIE are available for [download](https://bitbucket.org/malb/m4ri/downloads). The highlight of this version for M4RI is support for reading and writing 1-bit PNG images. The highlight of this release of M4RIE is much improved performance for $4 < e \leq 8$. Detailed changlogs are available [here](https://bitbucket.org/malb/m4ri/wiki/M4RI-20111203) for M4RI and [here](https://bitbucket.org/malb/m4rie/wiki/M4RIE-20111203) for M4RIE. * **2011/11/30** A [technical report](http://arxiv.org/abs/1111.6900) by Martin R. Albrecht is available describing the M4RIE library. In particular, Newton-John tables are introduced and our implementation of Karatsuba based matrix-matrix multiplication is described: > **The M4RIE library for dense linear algebra over small fields with even characteristic** > > *Abstract:* In this work, we present the M4RIE library which implements efficient algorithms for > linear algebra with dense matrices over GF(2^e) for 2 ≤ e ≤ 10. As the name of the library > indicates, it makes heavy use of the M4RI library both directly (i.e., by calling it) and > indirectly (i.e., by using its concepts). We provide an open-source GPLv2+ C library for > efficient linear algebra over GF(2^e) for e small. In this library we implemented an idea due to > Bradshaw and Boothby which reduces matrix multiplication over GF(p^k) to a series of matrix > multiplications over GF(p). Furthermore, we propose a caching technique - Newton-John tables - > to avoid finite field multiplications which is inspired by Kronrod's method ("M4RM") for matrix > multiplication over GF(2). Using these two techniques we provide asymptotically fast triangular > solving with matrices (TRSM) and PLE-based Gaussian elimination. As a result, we are able to > significantly improve upon the state of the art in dense linear algebra over $F(2^e) with 2 ≤ e > ≤ 10. * **2011/11/29** A [technical report](http://arxiv.org/abs/1111.6549) by Martin R. Albrecht, Gregory Bard and Clément Pernet is available describing the Gaussian elimination machinery (PLE decomposition) in the M4RI library: > **Efficient Dense Gaussian Elimination over the Finite Field with Two Elements.** > > *Abstract:* In this work we describe an efficient implementation of a hierarchy of algorithms > for Gaussian elimination upon dense matrices over the field with two elements. We discuss both > well-known and new algorithms as well as our implementations in the M4RI library, which has been > adopted into Sage. The focus of our discussion is a block iterative algorithm for PLE > decomposition which is inspired by the M4RI algorithm. The implementation presented in this work > provides considerable performance gains in practice when compared to the previously fastest > implementation. We provide performance figures on x86_64 CPUs to demonstrate the alacrity of our > approach. * **2011/10/10** A new release of M4RI is available for [download](https://bitbucket.org/malb/m4ri/downloads/m4ri-20111004.tar.gz). See the [release notes](https://bitbucket.org/malb/m4ri/wiki/M4RI-20111004) for the list of changes. Also, a new release of M4RIE is also available for [download](https://bitbucket.org/malb/m4ri/downloads/m4rie-20111004.tar.gz). See the [release notes](https://bitbucket.org/malb/m4rie/wiki/M4RIE-20111004) for the list of changes. * **2011/07/14** A new release of M4RI is available for [download](https://bitbucket.org/malb/m4ri/downloads/m4ri-20110715.tar.gz). See the [release notes](https://bitbucket.org/malb/m4ri/wiki/M4RI-20110715) for the list of changes. Also, a new release of M4RIE is also available for [download](https://bitbucket.org/malb/m4ri/downloads/m4rie-20110715.tar.gz). M4RIE now relies on M4RI for cache size and other hardware feature detection. * **2011/06/10** A new release of M4RI is available for [download](https://bitbucket.org/malb/m4ri/downloads/m4ri-20110613.tar.gz). This version fixes various issues when M4RI is built with OpenMP enabled. * **2011/06/01** A new release of M4RI is available for [download.](https://bitbucket.org/malb/m4ri/downloads/m4ri-20110601.tar.gz) See the [release notes](https://bitbucket.org/malb/m4ri/wiki/M4RI-20110601) for the list of changes. Also, a new release of M4RIE is also available for [download](https://bitbucket.org/malb/m4ri/downloads/m4rie-20110601.tar.gz). The only changes to M4RIE are to ensure compatibility with M4RI version 20110601 and up. * **2011/04/13** We now have a [mailinglist](http://groups.google.com/group/m4ri-devel). * **2010/08/14** A new release of M4RI is available for [download.](https://bitbucket.org/malb/m4ri/downloads/m4ri-20100817.tar.gz) The main changes are improved automatic cache size detection and some clean ups necessary for M4RIE. A first official release of M4RIE is also available for [download](https://bitbucket.org/malb/m4ri/downloads/m4rie-20100817.tar.gz). * **2010/07/13** A new [release](https://bitbucket.org/malb/m4ri/downloads/m4ri-20100701.tar.gz) is available for download. See the [release notes](http://www.bitbucket.org/malb/m4ri/wiki/M4RI-20100701) for details. * **2009/11/04** A new [release](https://bitbucket.org/malb/m4ri/downloads/m4ri-20091101.tar.gz) is available for download. See the [release notes](http://www.bitbucket.org/malb/m4ri/wiki/M4RI-20091101) for details. * **2009/04/09** A new [release](https://bitbucket.org/malb/m4ri/downloads/m4ri-20090409.tar.gz) is available for download. It heavily breaks backward compatibility but supports much bigger matrices than before. See the [release notes](http://www.bitbucket.org/malb/m4ri/wiki/M4RI-20090409) for details. * **2009/01/05** A new [release](https://bitbucket.org/malb/m4ri/downloads/m4ri-20090105.tar.gz) is available for download. It contains new features, performance enhancements and bug fixes. [Release notes](http://www.bitbucket.org/malb/m4ri/wiki/M4RI-20090105) are available in the wiki. * **2008/11/12** A paper describing our matrix multiplication implementation is available as [pre-print](http://arxiv.org/abs/0811.1714) on the ArXiv. Also, M4RI is being [packaged](https://bugzilla.redhat.com/show_bug.cgi?id=470173) for Fedora Core. Finally, we updated the [peformance](./performance.html) data for GAP and Magma on the Core 2 Duo with improved timings. * **2008/10/28** A new [release](https://bitbucket.org/malb/m4ri/downloads/m4ri-20081029.tar.gz) is available for download. It contains mainly bugfixes but starting with this release triangular solving with matrices (TRSM) is fully supported. Also LUP factorisation (i.e. on full rank matrices) seems to be working now but it is not optimised at all. * **2008/10/22** The [slides](http://www.informatik.uni-bremen.de/~malb/talks/20081010%20-%20M4RI%20-%20Nancy.pdf) for the [Sage Days 10](http://wiki.sagemath.org/days10) talk about matrix multiplication in the M4RI library are available online. * **2008/09/22** A new [release](https://bitbucket.org/malb/m4ri/downloads/m4ri-20080909.tar.gz) is available. It is identical to the version of M4RI shipped with [Sage](http://www.sagemath.org) 3.1.2 and contains many build fixes for a wide range of platforms. Sage (and thus M4RI) supportes x86 Linux, x86_64 Linux, ia64 Linux, x86 OSX and ppc OSX. M4RI also supports Windows and Solaris 10. * **2008/08/26** This [release](https://bitbucket.org/malb/m4ri/downloads/m4ri-20080826.tar.gz) is a pure bugfix release. Before this bugfix, if the input matrices were very non-square either wrong results or SIGSEGVs could be observed. * **2008/08/21** A new [release](https://bitbucket.org/malb/m4ri/downloads/m4ri-20080821.tar.gz) is available. This release contains Clément Pernet's latest LQUP and TRSM development code. LQUP still lacks a basecase but TRSM should be fairly complete. No attempts were made so far to optimise things. Furthermore, this release contains an improved strategy for choosing $k$ in M4RM which improves performance on the Core2Duo. * **2008/08/17** A new [release](https://bitbucket.org/malb/m4ri/downloads/m4ri-20080817.tar.gz) is available. This release adds a simple memory manager for systems with slow malloc/free syscalls. Also, the initialisation (m4ri_init) and finalisation (m4ri_fini) routines are now called automatically when the library is loaded/unloaded. This is tested with GCC and SunCC but not with MSVC. Matrix elimination got slightly [faster](./performance.html) across plattforms, multi-core support was extended to elimination and improved for multiplication. The README contains instruction how to enable multi-core support. This release does not contain Clément Pernet's latest LQUP patch. * **2008/06/24** A new [release](https://bitbucket.org/malb/m4ri/downloads/m4ri-20080624.tar.gz) is available. This release uses the libtool [-release mechanism](http://www.gnu.org/software/libtool/manual.html#Versioning) to ensure binary (in)compatibility between releases since - again - the API changed: since the project is quite young do not expect the API to be stable anytime soon. Also the new version [attempts to detect](http://autoconf-archive.cryp.to/ax_cache_size.html) the L1 and L2 cache sizes and uses a Strassen-Winograd cutoff by default such that both source matrices fit in L2 (this is not optimal but a good compromise). This new version has some scratch/experimental code which is the beginning of asymptotically fast LQUP factorisation. Finally, elimination got slightly faster. * **2008/06/20** Thanks to [Tim Abbott](http://web.mit.edu/tabbott/www/) libM4RI is now in Debian/unstable. * **2008/06/13** It turns out our [comparison with Magma](./performance.html) on the Core2Duo was strongly biased, since we compared with a version of Magma that was optimised for AMD64 rather than Intel64\. The correct times are given now and we apologise for this mix-up. * **2008/06/03** This release is a small bugfix release. Matrices are now printed correctly and a bug in mzd_gauss_delayed was reported and fixed by Wael Said and Mohammed Saied. * **2008/06/01** This release greatly improves the performance of M4RI: the reduction of a given matrix to (reduced) row echelon form. The speed-up over the last release can be as much as ten, we will provide performance data for this in the near future. However, the new implementation still isn't asymptotically fast. Also mzd_transpose is much faster now due to improved data locality. * **2008/05/21** Today's release fixes a severe bug found by Bill Hart, disables SSE2 on all CPUs except those manufactured by Intel (for performance reasons), improves [performance](./performance.html) on the Core2Duo and introduces a configure switch to enable OpenMP support. * **2008/05/20** A new release is available with [massive speed improvements](./performance.html) for matrix multiplication. These improvements were discussed and tested in [this thread](http://groups.google.com/group/sage-devel/browse_thread/thread/aa4edc241ca4d6bb) on the [sage-devel](http://groups.google.com/group/sage-devel) mailing list. This release has also experimental and preliminary support for [OpenMP](http://www.openmp.org). To activate it compile with GCC 4.2 and `CFLAGS="-fopenmp -DHAVE_OPENMP“` Note however, that this release is still a developer preview since some automatic tuning is still not implemented, the performance on the Opteron isn't acceptable yet, and the parallel implementation is naive. * **2008/05/16** Release early, release often. This release fixes the unconditional use of _mm_free even when it is not available. * **2008/05/15** A new minor release is available which improves performance on Opterons. Also, the website has moved to [http://m4ri.sagemath.org](http://m4ri.sagemath.org/). m4ri-release-20240729/bench/000077500000000000000000000000001465170556500153535ustar00rootroot00000000000000m4ri-release-20240729/bench/Makefile.am000066400000000000000000000045641465170556500174200ustar00rootroot00000000000000# TOPSRCDIR allows to compile this testsuite for a clone (and different revision) of this # repository. If the environment variable topsrcdir is the top source directory of the clone, # compile this testsuite as: # # make TOPSRCDIR="$topsrcdir" # # Finally, if you also leave out TOPSRCDIR and just run 'make' then the testsuite # is compiled against the current source tree. AUTOMAKE_OPTIONS = foreign TOPSRCDIR = $(srcdir)/.. TOPBUILDDIR = $(builddir)/.. DEFINES = # include TOPBUILDIR for m4ri_config.h AM_CFLAGS = -I$(TOPSRCDIR) -I$(TOPBUILDDIR) -D_XOPEN_SOURCE=600 @CFLAGS@ $(DEFINES) @OPENMP_CFLAGS@ @PAPI_CFLAGS@ @LIBPNG_CFLAGS@ STAGEDIR := $(realpath -s $(TOPBUILDDIR)/.libs) AM_LDFLAGS = -L$(STAGEDIR) -Wl,-rpath,$(STAGEDIR) cpucycles.o -lm4ri -lm @PAPI_LDFLAGS@ @PAPI_LIBS@ LDFLAGS = -no-install BENCH = bench_elimination \ bench_multiplication \ bench_m4rm \ bench_ple \ bench_trsm \ bench_elimination_sparse \ bench_mzd \ bench_invert \ bench_rank CPUCYCLES_DIR = cpucycles-20060326 CPUCYCLES_EXTRA_DIST = $(srcdir)/cpucycles-20060326/*.c \ $(srcdir)/cpucycles-20060326/*.h \ $(srcdir)/cpucycles-20060326/compile \ $(srcdir)/cpucycles-20060326/do EXTRA_DIST = $(CPUCYCLES_EXTRA_DIST) bin_PROGRAMS = $(BENCH) bench_elimination_SOURCES = bench_elimination.c benchmarking.c benchmarking.h bench_multiplication_SOURCES = bench_multiplication.c benchmarking.c benchmarking.h bench_m4rm_SOURCES = bench_m4rm.c benchmarking.c benchmarking.h bench_ple_SOURCES = bench_ple.c benchmarking.c benchmarking.h bench_trsm_SOURCES = bench_trsm.c benchmarking.c benchmarking.h bench_elimination_sparse_SOURCES = bench_elimination_sparse.c benchmarking.c benchmarking.h bench_mzd_SOURCES = bench_mzd.c benchmarking.c benchmarking.h bench_invert_SOURCES = bench_invert.c benchmarking.c benchmarking.h bench_rank_SOURCES = bench_rank.c benchmarking.c benchmarking.h BUILT_SOURCES = cpucycles.h cpucycles.h: cpucycles.o cpucycles.o: (if [ $(srcdir) != $(builddir) ]; then \ cp -r $(srcdir)/$(CPUCYCLES_DIR) $(builddir); \ chmod +w -R $(CPUCYCLES_DIR)/; \ fi; \ cd $(CPUCYCLES_DIR); \ sh do; \ cp cpucycles.o ..; \ cp cpucycles.h ..; \ cd ..; \ if [ $(srcdir) != $(builddir) ]; then \ rm -r $(CPUCYCLES_DIR); \ fi) distclean-local: -rm -f cpucycles.h m4ri-release-20240729/bench/bench_elimination.c000066400000000000000000000145311465170556500211720ustar00rootroot00000000000000#include #include "benchmarking.h" #include "cpucycles.h" #include #include #ifdef HAVE_LIBPAPI #define _GNU_SOURCE #include #include #include // papi.h needs caddr_t #endif struct elim_params { rci_t m; rci_t n; rci_t r; char const *algorithm; }; static unsigned long long loop_calibration[32]; int run_nothing(void *_p, unsigned long long *data, int *data_len) { struct elim_params *p = (struct elim_params *)_p; mzd_t *A = mzd_init(p->m, p->n); if (p->r != 0) { mzd_t *L, *U; L = mzd_init(p->m, p->m); U = mzd_init(p->m, p->n); mzd_randomize(U); mzd_randomize(L); for (rci_t i = 0; i < p->m; ++i) { for (rci_t j = i + 1; j < p->m; j += m4ri_radix) { int const length = MIN(m4ri_radix, p->m - j); mzd_clear_bits(L, i, j, length); } mzd_write_bit(L, i, i, 1); for (rci_t j = 0; j < i; j += m4ri_radix) { int const length = MIN(m4ri_radix, i - j); mzd_clear_bits(U, i, j, length); } if (i < p->r) { mzd_write_bit(U, i, i, 1); } else { for (rci_t j = i; j < p->n; j += m4ri_radix) { int const length = MIN(m4ri_radix, p->n - i); mzd_clear_bits(U, i, j, length); } } } mzd_mul(A, L, U, 0); mzd_free(L); mzd_free(U); } else { mzd_randomize(A); } #ifndef HAVE_LIBPAPI *data_len = 2; #else *data_len = MIN(papi_array_len + 1, *data_len); #endif int papi_res; #ifndef HAVE_LIBPAPI data[0] = walltime(0); data[1] = cpucycles(); #else int array_len = *data_len - 1; unsigned long long t0 = PAPI_get_virt_usec(); papi_res = PAPI_start_counters((int *)papi_events, array_len); if (papi_res) m4ri_die(""); #endif #ifndef HAVE_LIBPAPI data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); #else PAPI_stop_counters((long long *)&data[1], array_len); t0 = PAPI_get_virt_usec() - t0; data[0] = t0; for (int nv = 0; nv <= array_len; ++nv) { if (data[nv] < loop_calibration[nv]) loop_calibration[nv] = data[nv]; } #endif mzd_free(A); return (0); } int run(void *_p, unsigned long long *data, int *data_len) { struct elim_params *p = (struct elim_params *)_p; #ifndef HAVE_LIBPAPI *data_len = 2; #else *data_len = MIN(papi_array_len + 1, *data_len); #endif int papi_res; mzd_t *A = mzd_init(p->m, p->n); if (p->r != 0) { mzd_t *L, *U; L = mzd_init(p->m, p->m); U = mzd_init(p->m, p->n); mzd_randomize(U); mzd_randomize(L); for (rci_t i = 0; i < p->m; ++i) { for (rci_t j = i + 1; j < p->m; j += m4ri_radix) { int const length = MIN(m4ri_radix, p->m - j); mzd_clear_bits(L, i, j, length); } mzd_write_bit(L, i, i, 1); for (rci_t j = 0; j < i; j += m4ri_radix) { int const length = MIN(m4ri_radix, i - j); mzd_clear_bits(U, i, j, length); } if (i < p->r) { mzd_write_bit(U, i, i, 1); } else { for (rci_t j = i; j < p->n; j += m4ri_radix) { int const length = MIN(m4ri_radix, p->n - i); mzd_clear_bits(U, i, j, length); } } } mzd_mul(A, L, U, 0); mzd_free(L); mzd_free(U); } else { mzd_randomize(A); } #ifndef HAVE_LIBPAPI data[0] = walltime(0); data[1] = cpucycles(); #else int array_len = *data_len - 1; unsigned long long t0 = PAPI_get_virt_usec(); papi_res = PAPI_start_counters((int *)papi_events, array_len); if (papi_res) m4ri_die(""); #endif if (strcmp(p->algorithm, "m4ri") == 0) p->r = mzd_echelonize_m4ri(A, 1, 0); else if (strcmp(p->algorithm, "pluq") == 0) p->r = mzd_echelonize_pluq(A, 1); else if (strcmp(p->algorithm, "mmpf") == 0) p->r = _mzd_pluq_russian(A, mzp_init(A->nrows), mzp_init(A->ncols), 0); else if (strcmp(p->algorithm, "naive") == 0) p->r = mzd_echelonize_naive(A, 1); #ifndef HAVE_LIBPAPI data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); #else PAPI_stop_counters((long long *)&data[1], array_len); t0 = PAPI_get_virt_usec() - t0; data[0] = t0; for (int nv = 0; nv <= array_len; ++nv) { data[nv] -= loop_calibration[nv]; } #endif mzd_free(A); return 0; } void print_help_and_exit() { printf("Parameters m(, n, alg, r) expected.\n"); printf(" m -- integer > 0\n"); printf(" n -- integer > 0\n"); printf(" alg -- 'm4ri', 'pluq', 'mmpf' or 'naive' (default: 'pluq')\n"); printf(" r -- target rank >= 0, if 0 then mzd_randomize() is called (default: MIN(m,n))\n"); printf("\n"); bench_print_global_options(stderr); m4ri_die(""); } int main(int argc, char **argv) { int opts = global_options(&argc, &argv); int data_len; #ifdef HAVE_LIBPAPI int papi_counters = PAPI_num_counters(); if (papi_counters < papi_array_len) { fprintf(stderr, "%s: Warning: there are only %d hardware counters available!\n", progname, papi_counters); papi_array_len = papi_counters; } if (papi_test(papi_events, papi_array_len)) exit(1); for (int nv = 0; nv <= papi_array_len; ++nv) loop_calibration[nv] = 100000000; data_len = papi_array_len + 1; #else data_len = 2; #endif if (opts < 0 || argc < 2 || argc > 5) { print_help_and_exit(); } struct elim_params params; params.m = atoi(argv[1]); if (argc >= 3) params.n = atoi(argv[2]); else params.n = params.m; if (argc >= 4) params.algorithm = argv[3]; else params.algorithm = "pluq"; if (argc >= 5) params.r = atoi(argv[4]); else params.r = params.m; srandom(17); unsigned long long data[16]; for (int i = 0; i < 4; ++i) run_nothing((void *)¶ms, data, &data_len); run_bench(run, (void *)¶ms, data, data_len); double cc_per_op = ((double)data[1]) / ((double)params.m * (double)params.n * powl((double)params.r, 0.807)); printf("m: %5d, n: %5d, last r: %5d, cpu cycles: %12llu, cc/(mnr^0.807): %.5lf, ", params.m, params.n, params.r, data[1], cc_per_op); print_wall_time(data[0] / 1000000.0); printf(", "); print_cpu_time(data[1] / (double)cpucycles_persecond()); printf("\n"); #ifdef HAVE_LIBPAPI for (int n = 1; n < data_len; ++n) { double tmp = ((double)data[n]) / powl((double)params.n, 2.807); printf("%20s (%20llu) per bit (divided by n^2.807): %15.5f\n", papi_event_name(papi_events[n - 1]), data[n], tmp); } #endif } m4ri-release-20240729/bench/bench_elimination_sparse.c000066400000000000000000000037161465170556500225520ustar00rootroot00000000000000#include #include "benchmarking.h" #include "cpucycles.h" #include #include struct elim_sparse_params { rci_t m; rci_t n; rci_t r; char const *algorithm; long density; int full; }; int run(void *_p, unsigned long long *data, int *data_len) { struct elim_sparse_params *p = (struct elim_sparse_params *)_p; *data_len = 2; mzd_t *A = mzd_init(p->m, p->n); for (rci_t i = 0; i < p->m; ++i) { for (rci_t j = 0; j < p->n; ++j) { if (random() <= p->density) { mzd_write_bit(A, i, j, 1); } } } data[0] = walltime(0); data[1] = cpucycles(); if (strcmp(p->algorithm, "m4ri") == 0) p->r = mzd_echelonize_m4ri(A, p->full, 0); else if (strcmp(p->algorithm, "cross") == 0) p->r = mzd_echelonize(A, p->full); else if (strcmp(p->algorithm, "pluq") == 0) p->r = mzd_echelonize_pluq(A, p->full); else if (strcmp(p->algorithm, "naive") == 0) p->r = mzd_echelonize_naive(A, p->full); data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzd_free(A); return 0; } int main(int argc, char **argv) { global_options(&argc, &argv); if (argc < 3) { m4ri_die("Parameters m,n, (alg,density,full) expected.\n"); } struct elim_sparse_params p; p.density = RAND_MAX / 10; // Use a density of 0.1 by default. p.full = 1; if (argc >= 4) p.algorithm = argv[3]; else p.algorithm = "m4ri"; if (argc >= 5) p.density = RAND_MAX * strtod(argv[4], NULL); if (argc >= 6) p.full = atoi(argv[5]); p.m = atoi(argv[1]); p.n = atoi(argv[2]); /* put this call in run() to benchmark one particular matrix over and over again instead of computing the average of various matrices.*/ srandom(17); unsigned long long data[2]; run_bench(run, (void *)&p, data, 2); printf("m: %5d, n: %5d, last r: %5d, density: %7.5f, cpu cycles: %10llu, wall time: %lf\n", p.m, p.n, p.r, (double)p.density / RAND_MAX, data[1], data[0] / 1000000.0); } m4ri-release-20240729/bench/bench_invert.c000066400000000000000000000054741465170556500201770ustar00rootroot00000000000000#include #include "benchmarking.h" #include "cpucycles.h" #include #include struct inv_params { rci_t n; int direction; char const *algorithm; }; int run(void *_p, unsigned long long *data, int *data_len) { struct inv_params *p = (struct inv_params *)_p; *data_len = 2; mzd_t *A = NULL, *L = NULL, *U = NULL, *B = NULL; if (p->direction <= 0) { L = mzd_init(p->n, p->n); mzd_randomize(L); for (rci_t i = 0; i < p->n; ++i) { for (rci_t j = i + 1; j < p->n; ++j) mzd_write_bit(L, i, j, 0); mzd_write_bit(L, i, i, 1); } } if (p->direction >= 0) { U = mzd_init(p->n, p->n); mzd_randomize(U); for (rci_t i = 0; i < p->n; ++i) { for (rci_t j = 0; j < i; ++j) mzd_write_bit(U, i, j, 0); mzd_write_bit(U, i, i, 1); } } switch (p->direction) { case 0: A = mzd_mul(NULL, L, U, 0); mzd_free(L); mzd_free(U); break; case -1: A = L; break; case 1: A = U; break; default: m4ri_die("unknown direction '%d'", p->direction); }; data[0] = walltime(0); data[1] = cpucycles(); switch (p->direction) { case 0: if (strcmp(p->algorithm, "m4ri") == 0) B = mzd_inv_m4ri(NULL, A, 0); else m4ri_die("unknown algorithm: '%s'", p->algorithm); break; case 1: if (strcmp(p->algorithm, "m4ri") == 0) { mzd_trtri_upper_russian(A, 0); B = mzd_copy(NULL, A); } else if (strcmp(p->algorithm, "mm") == 0) { mzd_trtri_upper(A); B = mzd_copy(NULL, A); } else m4ri_die("unknown algorithm: '%s'", p->algorithm); break; case -1: m4ri_die("not implemented error"); break; } data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzd_free(A); mzd_free(B); return 0; } int main(int argc, char **argv) { int opts = global_options(&argc, &argv); if (opts < 0) { bench_print_global_options(stderr); exit(-1); } if (argc != 4) { printf("Parameters n,direction,alg expected.\n"); printf(" n -- integer > 0\n"); printf(" direction -- lower triangular (-1), full (0) or upper triangular (1).\n"); printf(" algorithm -- 'm4ri' or 'mm' (for direction 1)\n"); printf("\n"); bench_print_global_options(stderr); m4ri_die(""); } struct inv_params params; params.n = atoi(argv[1]); params.direction = atoi(argv[2]); params.algorithm = argv[3]; /* put this call in run() to benchmark one particular matrix over and over again instead of computing the average of various matrices.*/ srandom(17); unsigned long long data[2]; run_bench(run, (void *)¶ms, data, 2); double cc_per_op = ((double)data[1]) / powl((double)params.n, 2.807); printf("n: %5d, cpu cycles: %10llu, cc/(n^2.807): %.5lf, wall time: %lf\n", params.n, data[1], cc_per_op, data[0] / 1000000.0); } m4ri-release-20240729/bench/bench_m4rm.c000066400000000000000000000113241465170556500175360ustar00rootroot00000000000000#include #include "benchmarking.h" #include "cpucycles.h" #include #include #ifdef HAVE_LIBPAPI #define _GNU_SOURCE #include #include #include // papi.h needs caddr_t #endif struct mul_params { rci_t m; rci_t n; rci_t l; int k; }; static unsigned long long loop_calibration[32]; int run_nothing(void *_p, unsigned long long *data, int *data_len) { struct mul_params *p = (struct mul_params *)_p; mzd_t *A = mzd_init(p->m, p->n); mzd_t *B = mzd_init(p->n, p->l); mzd_randomize(A); mzd_randomize(B); #ifndef HAVE_LIBPAPI *data_len = 2; #else *data_len = MIN(papi_array_len + 1, *data_len); #endif #ifndef HAVE_LIBPAPI data[0] = walltime(0); data[1] = cpucycles(); #else int papi_res; int array_len = *data_len - 1; unsigned long long t0 = PAPI_get_virt_usec(); papi_res = PAPI_start_counters((int *)papi_events, array_len); if (papi_res) m4ri_die(""); #endif #ifndef HAVE_LIBPAPI data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); #else PAPI_stop_counters((long long *)&data[1], array_len); t0 = PAPI_get_virt_usec() - t0; data[0] = t0; for (int nv = 0; nv <= array_len; ++nv) { if (data[nv] < loop_calibration[nv]) loop_calibration[nv] = data[nv]; } #endif mzd_free(A); mzd_free(B); return (0); } int run(void *_p, unsigned long long *data, int *data_len) { struct mul_params *p = (struct mul_params *)_p; #ifndef HAVE_LIBPAPI *data_len = 2; #else *data_len = MIN(papi_array_len + 1, *data_len); #endif mzd_t *A = mzd_init(p->m, p->n); mzd_t *B = mzd_init(p->n, p->l); mzd_t *C = mzd_init(p->m, p->l); mzd_randomize(A); mzd_randomize(B); #ifndef HAVE_LIBPAPI data[0] = walltime(0); data[1] = cpucycles(); #else int papi_res; int array_len = *data_len - 1; unsigned long long t0 = PAPI_get_virt_usec(); papi_res = PAPI_start_counters((int *)papi_events, array_len); if (papi_res) m4ri_die(""); #endif _mzd_mul_m4rm(C, A, B, p->k, 0); #ifndef HAVE_LIBPAPI data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); #else PAPI_stop_counters((long long *)&data[1], array_len); t0 = PAPI_get_virt_usec() - t0; data[0] = t0; for (int nv = 0; nv <= array_len; ++nv) { data[nv] -= loop_calibration[nv]; } #endif mzd_free(A); mzd_free(B); mzd_free(C); return (0); } void print_help_and_exit() { printf("Two options are supported:\n\n"); printf("#SHORT#\n"); printf(" n -- row and column dimensions of A and B, integer > 0\n"); printf(" k -- block size (or 0 for automatic choice)\n"); printf("\n#LONG#\n"); printf(" m -- row dimension of A, integer > 0\n"); printf(" n -- column dimension of A, integer > 0\n"); printf(" l -- column dimension of B, integer > 0\n"); printf(" k -- block size (or 0 for automatic choice)\n"); printf("\n"); bench_print_global_options(stderr); m4ri_die(""); } int main(int argc, char **argv) { int opts = global_options(&argc, &argv); int data_len; struct mul_params params; #ifdef HAVE_LIBPAPI int papi_counters = PAPI_num_counters(); if (papi_counters < papi_array_len) { fprintf(stderr, "%s: Warning: there are only %d hardware counters available!\n", progname, papi_counters); papi_array_len = papi_counters; } if (papi_test(papi_events, papi_array_len)) exit(1); for (int nv = 0; nv <= papi_array_len; ++nv) loop_calibration[nv] = 100000000; data_len = papi_array_len + 1; #else data_len = 2; #endif if (opts < 0) { print_help_and_exit(); } switch (argc) { case 3: params.m = atoi(argv[1]); params.n = atoi(argv[1]); params.l = atoi(argv[1]); params.k = atoi(argv[2]); break; case 5: params.m = atoi(argv[1]); params.n = atoi(argv[2]); params.l = atoi(argv[3]); params.k = atoi(argv[4]); break; default: print_help_and_exit(); } if (params.m <= 0 || params.n <= 0 || params.l <= 0) { m4ri_die("Parameters m,n,l must be > 0\n"); } srandom(17); unsigned long long data[16]; for (int i = 0; i < 100; ++i) run_nothing((void *)¶ms, data, &data_len); run_bench(run, (void *)¶ms, data, data_len); double cc_per_op = ((double)data[1]) / powl((double)params.n, 2.807); printf("m: %5d, n: %5d, l: %5d, k: %5d, cpu cycles: %12llu, cc/n^2.807: %.5lf, ", params.m, params.n, params.l, params.k, data[1], cc_per_op); print_wall_time(data[0] / 1000000.0); printf("\n"); #ifdef HAVE_LIBPAPI for (int n = 1; n < data_len; ++n) { double tmp = ((double)data[n]) / powl((double)params.n, 2.807); printf("%20s (%20llu) per bit (divided by n^2.807): %15.5f\n", papi_event_name(papi_events[n - 1]), data[n], tmp); } #endif } m4ri-release-20240729/bench/bench_multiplication.c000066400000000000000000000136311465170556500217170ustar00rootroot00000000000000#include #include "benchmarking.h" #include "cpucycles.h" #include #include #ifdef HAVE_LIBPAPI #define _GNU_SOURCE #include #include #include // papi.h needs caddr_t #endif struct mul_params { rci_t m; rci_t n; rci_t l; int cutoff; int mp; }; static unsigned long long loop_calibration[32]; int run_nothing(void *_p, unsigned long long *data, int *data_len) { struct mul_params *p = (struct mul_params *)_p; mzd_t *A = mzd_init(p->m, p->n); mzd_t *B = mzd_init(p->n, p->l); mzd_randomize(A); mzd_randomize(B); #ifndef HAVE_LIBPAPI *data_len = 2; #else *data_len = MIN(papi_array_len + 1, *data_len); #endif int papi_res; #ifndef HAVE_LIBPAPI data[0] = walltime(0); data[1] = cpucycles(); #else int array_len = *data_len - 1; unsigned long long t0 = PAPI_get_virt_usec(); papi_res = PAPI_start_counters((int *)papi_events, array_len); if (papi_res) m4ri_die(""); #endif #ifndef HAVE_LIBPAPI data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); #else PAPI_stop_counters((long long *)&data[1], array_len); t0 = PAPI_get_virt_usec() - t0; data[0] = t0; for (int nv = 0; nv <= array_len; ++nv) { if (data[nv] < loop_calibration[nv]) loop_calibration[nv] = data[nv]; } #endif mzd_free(A); mzd_free(B); return (0); } int run(void *_p, unsigned long long *data, int *data_len) { struct mul_params *p = (struct mul_params *)_p; #ifndef HAVE_LIBPAPI *data_len = 2; #else *data_len = MIN(papi_array_len + 1, *data_len); #endif int papi_res; mzd_t *A = mzd_init(p->m, p->n); mzd_t *B = mzd_init(p->n, p->l); mzd_randomize(A); mzd_randomize(B); #ifndef HAVE_LIBPAPI data[0] = walltime(0); data[1] = cpucycles(); #else int array_len = *data_len - 1; unsigned long long t0 = PAPI_get_virt_usec(); papi_res = PAPI_start_counters((int *)papi_events, array_len); if (papi_res) m4ri_die(""); #endif mzd_t *C; if (p->mp == 0) { C = mzd_mul(NULL, A, B, p->cutoff); } else { #if __M4RI_HAVE_OPENMP C = mzd_mul_mp(NULL, A, B, p->cutoff); #else printf("option mp ignored\n"); C = mzd_mul(NULL, A, B, p->cutoff); #endif } #ifndef HAVE_LIBPAPI data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); #else PAPI_stop_counters((long long *)&data[1], array_len); t0 = PAPI_get_virt_usec() - t0; data[0] = t0; for (int nv = 0; nv <= array_len; ++nv) { data[nv] -= loop_calibration[nv]; } #endif mzd_free(A); mzd_free(B); mzd_free(C); return (0); } void print_help_and_exit() { printf("Parameters expected.\n"); printf("Two combinations are supported:\n"); printf(" 1. n, cuttoff\n"); printf(" n -- matrix dimension, integer > 0\n"); printf(" cutoff -- integer >= 0 (optional, default: 0).\n\n"); printf(" 2. m, n, l, cuttoff\n"); printf(" m -- row dimension of A, integer > 0\n"); printf(" n -- column dimension of A, integer > 0\n"); printf(" l -- column dimension of B, integer > 0\n"); printf(" cutoff -- integer >= 0 (optional, default: 0).\n\n"); printf(" 3. m, n, l, cuttoff mp\n"); printf(" m -- row dimension of A, integer > 0\n"); printf(" n -- column dimension of A, integer > 0\n"); printf(" l -- column dimension of B, integer > 0\n"); printf(" cutoff -- integer >= 0 (default: 0).\n"); printf(" mp -- use mp implementation (default: 0).\n\n"); printf("\n"); bench_print_global_options(stderr); m4ri_die(""); } int main(int argc, char **argv) { int opts = global_options(&argc, &argv); int data_len; struct mul_params params; #ifdef HAVE_LIBPAPI int papi_counters = PAPI_num_counters(); if (papi_counters < papi_array_len) { fprintf(stderr, "%s: Warning: there are only %d hardware counters available!\n", progname, papi_counters); papi_array_len = papi_counters; } if (papi_test(papi_events, papi_array_len)) exit(1); for (int nv = 0; nv <= papi_array_len; ++nv) loop_calibration[nv] = 100000000; data_len = papi_array_len + 1; #else data_len = 2; #endif if (opts < 0) { print_help_and_exit(); } switch (argc) { case 2: params.m = atoi(argv[1]); params.n = atoi(argv[1]); params.l = atoi(argv[1]); params.cutoff = 0; params.mp = 0; break; case 3: params.m = atoi(argv[1]); params.n = atoi(argv[1]); params.l = atoi(argv[1]); params.cutoff = atoi(argv[2]); params.mp = 0; break; case 4: params.m = atoi(argv[1]); params.n = atoi(argv[2]); params.l = atoi(argv[3]); params.cutoff = 0; params.mp = 0; break; case 5: params.m = atoi(argv[1]); params.n = atoi(argv[2]); params.l = atoi(argv[3]); params.cutoff = atoi(argv[4]); params.mp = 0; break; case 6: params.m = atoi(argv[1]); params.n = atoi(argv[2]); params.l = atoi(argv[3]); params.cutoff = atoi(argv[4]); params.mp = atoi(argv[5]); break; default: print_help_and_exit(); } if (params.m <= 0 || params.n <= 0 || params.l <= 0) { m4ri_die("Parameters m,n,l must be > 0\n"); } srandom(17); unsigned long long data[16]; for (int i = 0; i < 100; ++i) run_nothing((void *)¶ms, data, &data_len); run_bench(run, (void *)¶ms, data, data_len); double cc_per_op = ((double)data[1]) / powl((double)params.n, 2.807); printf("m: %5d, n: %5d, l: %5d, cutoff: %5d, cpu cycles: %12llu, cc/n^2.807: %.5lf, ", params.m, params.n, params.l, params.cutoff, data[1], cc_per_op); print_wall_time(data[0] / 1000000.0); printf("\n"); #ifdef HAVE_LIBPAPI for (int n = 1; n < data_len; ++n) { double tmp = ((double)data[n]) / powl((double)params.n, 2.807); printf("%20s (%20llu) per bit (divided by n^2.807): %15.5f\n", papi_event_name(papi_events[n - 1]), data[n], tmp); } #endif } m4ri-release-20240729/bench/bench_mzd.c000066400000000000000000000771271465170556500174660ustar00rootroot00000000000000/* * bench_packedmatrix.c * * Application to test functionality of packedmatrix.c. * * Copyright (C) 2011 Carlo Wood * RSA-1024 0x624ACAD5 1997-01-26 Sign & Encrypt * Fingerprint16 = 32 EC A7 B6 AC DB 65 A6 F6 F6 55 DD 1C DC FF 61 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS 1 #endif #include #ifdef HAVE_LIBPAPI #define _GNU_SOURCE #include #include #include // papi.h needs caddr_t #endif #include #include #include #include "benchmarking.h" #include "cpucycles.h" #include struct test_params { rci_t m; rci_t n; rci_t k; rci_t l; rci_t row[3]; int rows; rci_t col[3]; int cols; wi_t wrd[3]; int wrds; uint64_t count; int cutoff; int boolean; int integer; char const *funcname; }; typedef int (*run_type)(void *, unsigned long long *, int *); static unsigned long long loop_calibration[32]; #ifdef HAVE_LIBPAPI #define BENCHMARK_PREFIX(mzd_func) \ int run_##mzd_func(void *_p, unsigned long long *data, int *data_len) { \ *data_len = MIN(papi_array_len + 1, *data_len); \ struct test_params *p = (struct test_params *)_p; \ int papi_res; \ do #define TIME_BEGIN(mzd_func_with_ARGS) \ do { \ int array_len = *data_len - 1; \ mzd_func_with_ARGS; \ unsigned long long t0 = PAPI_get_virt_usec(); \ papi_res = PAPI_start_counters((int *)papi_events, array_len) #define TIME_END \ PAPI_stop_counters((long long *)&data[1], array_len); \ t0 = PAPI_get_virt_usec() - t0; \ data[0] = t0; \ for (int nv = 0; nv <= array_len; ++nv) { \ if (data[nv] < loop_calibration[nv]) loop_calibration[nv] = data[nv]; \ data[nv] -= loop_calibration[nv]; \ } \ } \ while (0) #define BENCHMARK_POSTFIX \ while (0) \ ; \ return papi_res; \ } #else // HAVE_LIBPAPI #define BENCHMARK_PREFIX(mzd_func) \ int run_##mzd_func(void *_p, unsigned long long *data, int *data_len) { \ *data_len = 2; \ struct test_params *p = (struct test_params *)_p; \ do #define TIME_BEGIN(mzd_func_with_ARGS) \ do { \ mzd_func_with_ARGS; \ data[0] = walltime(0); \ data[1] = cpucycles() #define TIME_END \ data[1] = cpucycles() - data[1]; \ data[0] = walltime(data[0]); \ } \ while (0) #define BENCHMARK_POSTFIX \ while (0) \ ; \ return 0; \ } #endif // HAVE_LIBPAPI #define TIME(mzd_func_with_ARGS) \ TIME_BEGIN(mzd_func_with_ARGS); \ for (uint64_t i = 0; i < loop_count; ++i) { mzd_func_with_ARGS; } \ TIME_END mzd_t *volatile vA; rci_t volatile vrowa; rci_t volatile vcola; rci_t volatile vrowb; rci_t volatile vcolb; wi_t volatile vstartblock; int volatile vn; int volatile vint; word volatile vword; BIT volatile vbit; BENCHMARK_PREFIX(bench_nothing) { mzd_t *const A = mzd_init(64, 64); mzd_randomize(A); uint64_t volatile loop_count = p->count; TIME(); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(_mzd_row_swap) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); vA = A; vrowa = p->row[0]; vrowb = p->row[1]; vstartblock = p->wrd[0]; uint64_t const loop_count = p->count; TIME(_mzd_row_swap(vA, vrowa, vrowb, vstartblock)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_row_swap) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); rci_t const rowa = p->row[0]; rci_t const rowb = p->row[1]; uint64_t const loop_count = p->count; TIME(mzd_row_swap(A, rowa, rowb)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_copy_row) { mzd_t *const A = mzd_init(p->m, p->n); mzd_t *const B = mzd_init(p->m, p->n); mzd_randomize(A); rci_t const rowa = p->row[0]; rci_t const rowb = p->row[1]; uint64_t const loop_count = p->count; TIME(mzd_copy_row(B, rowb, A, rowa)); mzd_free(A); mzd_free(B); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_col_swap) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); rci_t const cola = p->col[0]; rci_t const colb = p->col[1]; uint64_t const loop_count = p->count; TIME(mzd_col_swap(A, cola, colb)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_col_swap_in_rows) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); vA = A; vcola = p->col[0]; vcolb = p->col[1]; vrowa = p->row[0]; vrowb = p->row[1]; uint64_t const loop_count = p->count; TIME(mzd_col_swap_in_rows(vA, vcola, vcolb, vrowa, vrowb)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_read_bit) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); vA = A; vrowa = p->row[0]; vcola = p->col[0]; uint64_t const loop_count = p->count; TIME(vbit = mzd_read_bit(vA, vrowa, vcola)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_write_bit) { mzd_t *const A = mzd_init(p->m, p->n); vA = A; vrowa = p->row[0]; vcola = p->col[0]; vbit = 0; uint64_t const loop_count = p->count; TIME(mzd_write_bit(vA, vrowa, vcola, vbit); vbit = !vbit); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_row_add_offset) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); vA = A; vrowa = p->row[0]; vrowb = p->row[1]; vcola = p->col[0]; uint64_t const loop_count = p->count; TIME(mzd_row_add_offset(vA, vrowa, vrowb, vcola)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_row_add) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); rci_t const rowa = p->row[0]; rci_t const rowb = p->row[1]; uint64_t const loop_count = p->count; TIME(mzd_row_add(A, rowa, rowb)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_transpose) { mzd_t *const A = mzd_init(p->m, p->n); mzd_t *const B = mzd_init(p->n, p->m); mzd_randomize(A); uint64_t const loop_count = p->count; TIME(mzd_transpose(B, A)); mzd_free(A); mzd_free(B); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_mul_naive) { mzd_t *const A = mzd_init(p->m, p->l); mzd_t *const B = mzd_init(p->l, p->n); mzd_t *const C = mzd_init(p->m, p->n); mzd_randomize(A); mzd_randomize(B); uint64_t const loop_count = p->count; TIME(mzd_mul_naive(C, A, B)); mzd_free(A); mzd_free(B); mzd_free(C); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_addmul_naive) { mzd_t *const A = mzd_init(p->m, p->l); mzd_t *const B = mzd_init(p->l, p->n); mzd_t *const C = mzd_init(p->m, p->n); mzd_randomize(A); mzd_randomize(B); uint64_t const loop_count = p->count; TIME(mzd_addmul_naive(C, A, B)); mzd_free(A); mzd_free(B); mzd_free(C); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(_mzd_mul_naive) { mzd_t *const A = mzd_init(p->m, p->l); mzd_t *const B = mzd_init(p->n, p->l); mzd_t *const C = mzd_init(p->m, p->n); mzd_randomize(A); mzd_randomize(B); int const clear = p->boolean; uint64_t const loop_count = p->count; TIME(_mzd_mul_naive(C, A, B, clear)); mzd_free(A); mzd_free(B); mzd_free(C); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(_mzd_mul_va) { mzd_t *const A = mzd_init(p->m, p->n); mzd_t *const V = mzd_init(1, p->m); mzd_t *const C = mzd_init(1, p->n); mzd_randomize(A); mzd_randomize(V); int const clear = p->boolean; uint64_t const loop_count = p->count; TIME(_mzd_mul_va(C, V, A, clear)); mzd_free(A); mzd_free(V); mzd_free(C); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_gauss_delayed) { mzd_t **A = malloc(sizeof(mzd_t) * (p->count + 1)); rci_t const cola = p->col[0]; int const full = p->boolean; uint64_t const loop_count = p->count; rci_t result; for (int i = loop_count; i >= 0; --i) { A[i] = mzd_init(p->m, p->n); mzd_randomize(A[i]); } TIME_BEGIN(result = mzd_gauss_delayed(A[0], cola, full)); for (int i = loop_count; i > 0; --i) { result = mzd_gauss_delayed(A[i], cola, full); } TIME_END; for (int i = 0; i <= loop_count; ++i) mzd_free(A[i]); free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_echelonize_naive) { mzd_t **A = malloc(sizeof(mzd_t) * (p->count + 1)); int const full = p->boolean; uint64_t const loop_count = p->count; rci_t result; for (int i = loop_count; i >= 0; --i) { A[i] = mzd_init(p->m, p->n); mzd_randomize(A[i]); } TIME_BEGIN(result = mzd_echelonize_naive(A[0], full)); for (int i = loop_count; i > 0; --i) { result = mzd_echelonize_naive(A[i], full); } TIME_END; for (int i = 0; i <= loop_count; ++i) mzd_free(A[i]); free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_equal) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); mzd_t *const B = mzd_copy(NULL, A); uint64_t const loop_count = p->count; int volatile result; TIME(result = mzd_equal(A, B)); mzd_free(A); mzd_free(B); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_cmp) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); mzd_t *const B = mzd_copy(NULL, A); uint64_t const loop_count = p->count; int volatile result; TIME(result = mzd_cmp(A, B)); mzd_free(A); mzd_free(B); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_copy) { mzd_t *const A = mzd_init(p->m, p->n); mzd_t *const B = mzd_init(p->m, p->n); mzd_randomize(A); uint64_t const loop_count = p->count; TIME(mzd_copy(B, A)); mzd_free(A); mzd_free(B); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_concat) { mzd_t *const A = mzd_init(p->m, p->k); mzd_t *const B = mzd_init(p->m, p->l); mzd_t *const C = mzd_init(p->m, p->k + p->l); mzd_randomize(A); mzd_randomize(B); uint64_t const loop_count = p->count; TIME(mzd_concat(C, A, B)); mzd_free(A); mzd_free(B); mzd_free(C); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_stack) { mzd_t *const A = mzd_init(p->k, p->n); mzd_t *const B = mzd_init(p->l, p->n); mzd_t *const C = mzd_init(p->k + p->l, p->n); mzd_randomize(A); mzd_randomize(B); uint64_t const loop_count = p->count; TIME(mzd_stack(C, A, B)); mzd_free(A); mzd_free(B); mzd_free(C); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_submatrix) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); rci_t const rowa = p->row[0]; rci_t const cola = p->col[0]; rci_t const rowb = p->row[1]; rci_t const colb = p->col[1]; mzd_t *const S = mzd_init(rowb - rowa, colb - cola); uint64_t const loop_count = p->count; TIME(mzd_submatrix(S, A, rowa, cola, rowb, colb)); mzd_free(A); mzd_free(S); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_invert_naive) { mzd_t *const A = mzd_init(p->m, p->m); mzd_t *const I = mzd_init(p->m, p->m); mzd_t *const C = mzd_init(p->m, p->m); mzd_randomize(A); mzd_set_ui(I, 1); uint64_t const loop_count = p->count; TIME(mzd_invert_naive(C, A, I)); mzd_free(A); mzd_free(I); mzd_free(C); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_add) { mzd_t *const A = mzd_init(p->m, p->n); mzd_t *const B = mzd_init(p->m, p->n); mzd_t *const C = mzd_init(p->m, p->n); mzd_randomize(A); mzd_randomize(B); uint64_t const loop_count = p->count; TIME(mzd_add(C, A, B)); mzd_free(A); mzd_free(B); mzd_free(C); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(_mzd_add) { mzd_t *const A = mzd_init(p->m, p->n); mzd_t *const B = mzd_init(p->m, p->n); mzd_t *const C = mzd_init(p->m, p->n); mzd_randomize(A); mzd_randomize(B); uint64_t const loop_count = p->count; TIME(_mzd_add(C, A, B)); mzd_free(A); mzd_free(B); mzd_free(C); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_combine) { mzd_t *const A = mzd_init(p->m, p->n); mzd_t *const B = mzd_init(p->m, p->n); mzd_t *const C = mzd_init(p->m, p->n); mzd_randomize(A); mzd_randomize(B); rci_t row1 = p->row[0]; rci_t row2 = p->row[1]; rci_t row3 = p->row[2]; wi_t startblock = p->wrd[0]; uint64_t const loop_count = p->count; TIME(mzd_combine(C, row3, startblock, A, row1, startblock, B, row2, startblock)); mzd_free(A); mzd_free(B); mzd_free(C); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_read_bits) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); vA = A; vrowa = p->row[0]; vcola = p->col[0]; vn = p->integer; uint64_t const loop_count = p->count; TIME(vword = mzd_read_bits(vA, vrowa, vcola, vn)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_read_bits_int) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); vA = A; vrowa = p->row[0]; vcola = p->col[0]; vn = p->integer; uint64_t const loop_count = p->count; TIME(vint = mzd_read_bits_int(vA, vrowa, vcola, vn)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_xor_bits) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); vA = A; vrowa = p->row[0]; vcola = p->col[0]; vn = p->integer; vword = 0xffffffffffffffffULL; uint64_t const loop_count = p->count; TIME(mzd_xor_bits(vA, vrowa, vcola, vn, vword)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_and_bits) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); vA = A; vrowa = p->row[0]; vcola = p->col[0]; vn = p->integer; vword = 0xffffffffffffffffULL; uint64_t const loop_count = p->count; TIME(mzd_and_bits(vA, vrowa, vcola, vn, vword)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_clear_bits) { mzd_t *volatile A = mzd_init(p->m, p->n); mzd_randomize(A); vA = A; vrowa = p->row[0]; vcola = p->col[0]; vn = p->integer; uint64_t const loop_count = p->count; TIME(mzd_clear_bits(vA, vrowa, vcola, vn)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_is_zero) { mzd_t *const A = mzd_init(p->m, p->n); uint64_t const loop_count = p->count; int volatile result; TIME(result = mzd_is_zero(A)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_find_pivot) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); rci_t row = p->row[0]; rci_t col = p->col[0]; uint64_t const loop_count = p->count; int volatile result; rci_t row_out; rci_t col_out; TIME(result = mzd_find_pivot(A, row, col, &row_out, &col_out)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_density) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); wi_t res = p->wrd[0]; uint64_t const loop_count = p->count; double volatile result; TIME(result = mzd_density(A, res)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(_mzd_density) { mzd_t *const A = mzd_init(p->m, p->n); mzd_randomize(A); rci_t row = p->row[0]; rci_t col = p->col[0]; wi_t res = p->wrd[0]; uint64_t const loop_count = p->count; double volatile result; TIME(result = _mzd_density(A, res, row, col)); mzd_free(A); } BENCHMARK_POSTFIX BENCHMARK_PREFIX(mzd_first_zero_row) { mzd_t *const A = mzd_init(p->m, p->m); mzd_set_ui(A, 1); uint64_t const loop_count = p->count; rci_t volatile result; TIME(result = mzd_first_zero_row(A)); mzd_free(A); } BENCHMARK_POSTFIX // Returns a number proportional with the ideal number of // mathematical operations for the given code. double complexity1(struct test_params *p, char code) { switch (code) { case 'k': return p->k; // Linear with size 'k' of a matrix. case 'l': return p->l; // Linear with size 'l' of a matrix. case 'm': return p->m; // Linear with the number of rows of the matrix. case 'n': return p->n; // Linear with the number of columns of the matrix. case 'W': assert(p->n > m4ri_radix * p->wrd[0]); // Linear with the number of processed columns. return p->n - m4ri_radix * p->wrd[0]; case 'D': assert(p->row[0] < p->row[1]); return p->row[1] - p->row[0]; // Linear with the number of rows between start_row and stop_row. case 'E': assert(p->col[0] < p->col[1]); return p->col[1] - p->col[0]; // Linear with the number of cols between start_col and stop_col. case 'C': assert(p->col[0] < p->n); return p->n - p->col[0]; // Linear with the number of columns of column col and beyond. } return 0.0; } char const *complexity1_human(struct test_params *p, char code) { switch (code) { case 'k': return "k"; case 'l': return "l"; case 'm': return "m"; case 'n': return "n"; case 'W': return "cols"; case 'D': return "rows"; case 'E': return "cols"; case 'C': return "cols"; } return "UNKNOWN"; } double complexity(struct test_params *p, char const *cp) { double c = 1; while (*cp) { c *= complexity1(p, *cp); ++cp; } return c; } void print_complexity_human(struct test_params *p, char const *cp) { int first = 1; char last_cp = 0; int power = 0; while (*cp) { if (*cp != last_cp) { if (power > 1) printf("^%d", power); if (!first && isupper(*cp)) printf("*"); printf("%s", complexity1_human(p, *cp)); power = 0; last_cp = *cp; } ++power; ++cp; } if (power > 1) printf("^%d", power); } struct function_st { char const *funcname; run_type run_func; char const *input_codes; char const *complexity_code; uint64_t count; }; typedef struct function_st function_st; static function_st function_mapper[] = { {"_mzd_row_swap", run__mzd_row_swap, "Rmn,ri,ri,wi", "W", 1000000000}, {"mzd_row_swap", run_mzd_row_swap, "Rmn,ri,ri", "n", 1000000000}, {"mzd_copy_row", run_mzd_copy_row, "Omn,ri,R,ri", "n", 1000000000}, {"mzd_col_swap", run_mzd_col_swap, "Rmn,ci,ci", "m", 10000000}, {"mzd_col_swap_in_rows", run_mzd_col_swap_in_rows, "Rmn,ci,ci,ri,ri", "D", 10000000}, {"mzd_read_bit", run_mzd_read_bit, "Rmn,ri,ci", "", 100000000}, {"mzd_write_bit", run_mzd_write_bit, "Omn,ri,ci", "", 100000000}, {"mzd_row_add_offset", run_mzd_row_add_offset, "Rmn,ri,ri,ci", "C", 100000000}, {"mzd_row_add", run_mzd_row_add, "Rmn,ri,ri", "n", 100000000}, {"mzd_transpose", run_mzd_transpose, "Onm,Rmn", "mn", 10000000}, {"mzd_mul_naive", run_mzd_mul_naive, "Omn,Rml,Rln", "mnl", 10000000}, {"mzd_addmul_naive", run_mzd_addmul_naive, "Omn,Rml,Rln", "mnl", 10000000}, {"_mzd_mul_naive", run__mzd_mul_naive, "Omn,Rml,Rnl,b", "mnl", 10000000}, {"_mzd_mul_va", run__mzd_mul_va, "O1n,V1m,Amn,b", "mn", 1000000000}, {"mzd_gauss_delayed", run_mzd_gauss_delayed, "Rmn,ci,b", "mC", 10000000}, {"mzd_echelonize_naive", run_mzd_echelonize_naive, "Rmn,b", "mn", 10000000}, {"mzd_equal", run_mzd_equal, "Rmn,Rmn", "mn", 1000000000}, {"mzd_cmp", run_mzd_cmp, "Rmn,Rmn", "mn", 1000000000}, {"mzd_copy", run_mzd_copy, "Omn,Rmn", "mn", 1000000000}, {"mzd_concat", run_mzd_concat, "Omn,Rmk,Rml", "mn", 10000000}, {"mzd_stack", run_mzd_stack, "Omn,Rkn,Rln", "mn", 1000000000}, {"mzd_submatrix", run_mzd_submatrix, "O,Rmn,ri,ci,ri,ci", "DE", 10000000}, {"mzd_invert_naive", run_mzd_invert_naive, "Omm,Rmm,Imm", "mmm", 10000000}, {"mzd_add", run_mzd_add, "Omn,Rmn,Rmn", "mn", 10000000}, {"_mzd_add", run__mzd_add, "Omn,Rmn,Rmn", "mn", 10000000}, {"mzd_combine", run_mzd_combine, "Omn,ri,wi,R,ri,R,ri", "W", 10000000}, {"mzd_read_bits", run_mzd_read_bits, "Rmn,ri,ci,n", "", 10000000}, {"mzd_read_bits_int", run_mzd_read_bits_int, "Rmn,ri,ci,n", "", 10000000}, {"mzd_xor_bits", run_mzd_xor_bits, "Rmn,ri,ci,n,w", "", 10000000}, {"mzd_and_bits", run_mzd_and_bits, "Rmn,ri,ci,n,w", "", 10000000}, {"mzd_clear_bits", run_mzd_clear_bits, "Rmn,ri,ci,n", "", 10000000}, {"mzd_is_zero", run_mzd_is_zero, "Rmn", "mn", 10000000}, {"mzd_find_pivot", run_mzd_find_pivot, "Rmn,ri,ci", "", 1000000}, {"mzd_density", run_mzd_density, "Rmn,wi", "", 10000000}, {"_mzd_density", run__mzd_density, "Rmn,wi,ri,ci", "", 10000000}, {"mzd_first_zero_row", run_mzd_first_zero_row, "Rmm", "m", 10000000000}, {"nothing", run_bench_nothing, "", "", 1}}; int decode_size(char var, struct test_params *params, int *argcp, char ***argvp) { if (*argcp < 1) { fprintf(stderr, "%s: Not enough arguments. Expected matrix size: %c\n", progname, var); return 1; } --(*argcp); switch (var) { case 'k': params->k = atoi((*argvp)[0]); break; case 'l': params->l = atoi((*argvp)[0]); break; case 'm': params->m = atoi((*argvp)[0]); break; case 'n': params->n = atoi((*argvp)[0]); break; } ++(*argvp); return 0; } int decode_index(char idx, struct test_params *params, int *argcp, char ***argvp) { if (*argcp < 1) { int count = 0; switch (idx) { case 'r': count = params->rows; break; case 'c': count = params->cols; break; case 'w': count = params->wrds; break; } fprintf(stderr, "%s: Not enough arguments. Expected ", progname); switch (idx) { case 'r': fprintf(stderr, "row"); break; case 'c': fprintf(stderr, "column"); break; case 'w': fprintf(stderr, "word"); break; } fprintf(stderr, " index : %c%d\n", idx, count + 1); return 1; } --(*argcp); switch (idx) { case 'r': params->row[params->rows++] = atoi((*argvp)[0]); break; case 'c': params->col[params->cols++] = atoi((*argvp)[0]); break; case 'w': params->wrd[params->wrds++] = atoi((*argvp)[0]); break; } ++(*argvp); return 0; } int decode_code(char idx, struct test_params *params, int *argcp, char ***argvp) { if (*argcp < 1) { fprintf(stderr, "%s: Not enough arguments. Expected ", progname); switch (idx) { case 'b': printf("boolean"); break; case 'n': printf("integer"); break; default: printf("%c", idx); } fprintf(stderr, ".\n"); return 1; } --(*argcp); switch (idx) { case 'b': params->boolean = atoi((*argvp)[0]); if (params->boolean != 0 && params->boolean != 1) { fprintf(stderr, "%s: Expected boolean: %s\n", progname, (*argvp)[0]); return 1; } break; case 'n': params->integer = atoi((*argvp)[0]); break; } ++(*argvp); return 0; } int main(int argc, char **argv) { int opts = global_options(&argc, &argv); struct test_params params; unsigned long long data[8]; int data_len; #ifdef HAVE_LIBPAPI int papi_counters = PAPI_num_counters(); if (papi_counters < papi_array_len) { fprintf(stderr, "%s: Warning: there are only %d hardware counters available!\n", progname, papi_counters); papi_array_len = papi_counters; } int res = PAPI_start_counters((int *)papi_events, papi_array_len); switch (res) { case 0: { long long *tmp = (long long *)malloc(papi_array_len * sizeof(long long)); PAPI_stop_counters(tmp, papi_array_len); free(tmp); break; } case PAPI_ECNFLCT: { fprintf(stderr, "%s: %s: Conflicting event: The underlying counter hardware cannot count the specified " "events simultaneously.\n", progname, papi_event_name(papi_events[papi_array_len - 1])); fprintf(stderr, "Run `papi_event_chooser PRESET"); for (int nv = 0; nv < papi_array_len - 1; ++nv) fprintf(stderr, " %s", papi_event_name(papi_events[nv])); fprintf(stderr, "` to get a list of possible events that can be added.\n"); break; } case PAPI_ENOEVNT: { for (int nv = 0; nv < papi_array_len; ++nv) if ((res = PAPI_query_event(papi_events[nv])) != PAPI_OK) { fprintf(stderr, "%s: PAPI_start_counters: %s: %s.\n", progname, papi_event_name(papi_events[nv]), PAPI_strerror(res)); break; } break; } case PAPI_ESYS: fprintf(stderr, "%s: PAPI_start_counters: %s\n", progname, strerror(errno)); break; default: fprintf(stderr, "%s: PAPI_start_counters: %s.\n", progname, PAPI_strerror(res)); break; } if (res) return 1; for (int nv = 0; nv <= papi_array_len; ++nv) loop_calibration[nv] = 100000000; params.count = 1; data_len = papi_array_len + 1; for (int i = 0; i < 100; ++i) run_bench_nothing((void *)¶ms, data, &data_len); #endif int f; int found = 0; params.rows = 0; params.cols = 0; params.wrds = 0; params.cutoff = -1; if (argc >= 2) { params.funcname = argv[1]; for (f = 0; f < sizeof(function_mapper) / sizeof(function_mapper[0]); ++f) { if (strcmp(params.funcname, function_mapper[f].funcname) == 0) { found = 1; break; } } } if (!found) { if (argc >= 2) fprintf(stderr, "%s: function name \"%s\" not found.\n", progname, params.funcname); else { fprintf(stderr, "Usage: %s [OPTIONS] [ARGS]\n", progname); bench_print_global_options(stderr); } fprintf(stderr, "Possible values for :\n"); for (f = 0; f < sizeof(function_mapper) / sizeof(function_mapper[0]); ++f) { if (f != 0 && f % 4 == 0) fprintf(stderr, "\n"); fprintf(stderr, "%-22s", function_mapper[f].funcname); } fprintf(stderr, "\n"); return 1; } argc -= 2; // argc >= 1 if more arguments. argv += 2; // Next argument in argv[0] char *input_codes = strdup(function_mapper[f].input_codes); char *input_code[10]; char *p = input_codes; int codes = 0; while (*p) { input_code[codes++] = p++; while (*p && *p != ',') ++p; if (*p == ',') *p++ = '\0'; } int saw_var[4]; for (int var_index = 0; var_index < 4; ++var_index) saw_var[var_index] = 0; int saw_vars = 0; char usage[64]; char *usage_ptr = usage; int error = 0; for (int c = 0;; ++c) { if (c < codes) { p = input_code[c]; if (isupper(*p)) { while (*++p) { if (*p != '1') { int var_index = *p - 'k'; assert(var_index >= 0 && var_index <= 3); // 'k', 'l', 'm' or 'n'. saw_var[var_index] = 1; saw_vars = 1; } } continue; } } if (saw_vars) { saw_vars = 0; for (int var_count = 2; var_count < 6; ++var_count) { int var_index = var_count % 4; if (saw_var[var_index] == 1) { *usage_ptr++ = ' '; *usage_ptr++ = 'k' + var_index; saw_var[var_index] = 2; if (!error && decode_size('k' + var_index, ¶ms, &argc, &argv)) error = 1; } } } if (c == codes) break; if (p[1] == 'i') { *usage_ptr++ = ' '; *usage_ptr++ = *p; switch (*p) { case 'r': *usage_ptr++ = '1' + params.rows; if (error) ++params.rows; break; case 'c': *usage_ptr++ = '1' + params.cols; if (error) ++params.cols; break; case 'w': *usage_ptr++ = '1' + params.wrds; if (error) ++params.wrds; break; } if (!error && decode_index(*p, ¶ms, &argc, &argv)) error = 1; } else { *usage_ptr++ = ' '; *usage_ptr++ = *p; if (!error && decode_code(*p, ¶ms, &argc, &argv)) error = 1; } } *usage_ptr = '\0'; if (argc != 0) error = 1; if (error) { if (argc != 0) fprintf(stderr, "%s %s: too many parameters.\n", progname, params.funcname); fprintf(stderr, "Usage: %s [OPTIONS] %s%s\n", progname, params.funcname, usage); if (opts <= 0) bench_print_global_options(stderr); return 1; } double cost = complexity(¶ms, function_mapper[f].complexity_code); params.count = bench_count ? bench_count : function_mapper[f].count / cost; if (params.count < 1) params.count = 1; bench_count = params.count; srandom(17); data_len = run_bench(function_mapper[f].run_func, (void *)¶ms, data, sizeof(data) / sizeof(unsigned long long)); printf("function: %s, count: %" PRId64 ", ", params.funcname, params.count); if (saw_var[2]) printf("m: %d, ", params.m); if (saw_var[3]) printf("n: %d, ", params.n); if (saw_var[0]) printf("k: %d, ", params.k); if (saw_var[1]) printf("l: %d, ", params.l); for (int i = 0; i < 3; ++i) { if (i < params.rows) printf("row%c: %d, ", 'a' + i, params.row[i]); if (i < params.cols) printf("col%c: %d, ", 'a' + i, params.col[i]); if (i < params.wrds) printf("word%c: %d, ", 'a' + i, params.wrd[i]); } if (params.cutoff != -1) printf("cutoff: %d, ", params.cutoff); print_wall_time(data[0] / 1000000.0 / params.count); printf(", cpu cycles: %llu", (data[1] + params.count / 2) / params.count); #ifndef HAVE_LIBPAPI printf(", cc/"); print_complexity_human(¶ms, function_mapper[f].complexity_code); printf(": %f\n", data[1] / (params.count * cost)); #else printf("\n"); for (int n = 1; n < data_len; ++n) { printf("%s (%f) per bit (divided by ", papi_event_name(papi_events[n - 1]), (double)data[n] / params.count); print_complexity_human(¶ms, function_mapper[f].complexity_code); printf("): %f\n", data[n] / (params.count * cost)); } #endif } m4ri-release-20240729/bench/bench_ple.c000066400000000000000000000047771465170556500174550ustar00rootroot00000000000000#include #include "benchmarking.h" #include "cpucycles.h" #include #include struct pluq_params { rci_t m; rci_t n; rci_t r; char *what; }; int run(void *_p, unsigned long long *data, int *data_len) { struct pluq_params *p = (struct pluq_params *)_p; *data_len = 2; mzd_t *A = mzd_init(p->m, p->n); mzd_t *U; mzd_t *L; int halfrank = 0; if (halfrank) { U = mzd_init(p->m, p->n); L = mzd_init(p->m, p->m); mzd_randomize(U); mzd_randomize(L); #if 0 for (rci_t i = 0; i < p->m; ++i) { for (rci_t j = i + 1; j < p->m; ++j) mzd_write_bit(L,i,j, 0); mzd_write_bit(L,i,i, 1); } for(rci_t i = 0; i < MIN(p->m, p->n); ++i) { for (rci_t j = 0; j < i; ++j) mzd_write_bit(U,i,j, 0); mzd_write_bit(U,i,i, 1); } #endif for (rci_t i = 0; i < p->m; ++i) { mzd_write_bit(U, i, i, 1); for (rci_t j = 0; j < i; ++j) mzd_write_bit(U, i, j, 0); if ((i % 2)) for (rci_t j = i; j < p->n; ++j) mzd_write_bit(U, i, j, 0); for (rci_t j = i + 1; j < p->m; ++j) mzd_write_bit(L, i, j, 0); mzd_write_bit(L, i, i, 1); } mzd_mul(A, L, U, 0); } else { mzd_randomize(A); } mzp_t *P = mzp_init(p->m); mzp_t *Q = mzp_init(p->n); data[0] = walltime(0); data[1] = cpucycles(); if (strcmp(p->what, "pluq")) p->r = mzd_pluq(A, P, Q, 0); else if (strcmp(p->what, "ple")) p->r = mzd_ple(A, P, Q, 0); else m4ri_die("Unknown task '%s'", p->what); data[0] = walltime(data[0]); data[1] = cpucycles() - data[1]; mzd_free(A); mzp_free(P); mzp_free(Q); if (halfrank) { mzd_free(U); mzd_free(L); } return 0; } int main(int argc, char **argv) { int opts = global_options(&argc, &argv); if (opts < 0) { bench_print_global_options(stderr); exit(-1); } if (argc != 4) { printf("Parameters m,n,what expected.\n"); printf(" m -- integer > 0\n"); printf(" n -- integer > 0\n"); printf(" what -- PLUQ or PLE.\n"); printf("\n"); bench_print_global_options(stderr); m4ri_die(""); } struct pluq_params p; p.m = atoi(argv[1]); p.n = atoi(argv[2]); p.what = argv[3]; srandom(17); unsigned long long data[2]; run_bench(run, (void *)&p, data, 2); printf("m: %5d, n: %5d, what: %s, r: %5d, cpu cycles: %12llu, ", p.m, p.n, p.what, p.r, data[1]); print_wall_time(data[0] / 1000000.0); printf(", "); print_cpu_time(data[1] / (double)cpucycles_persecond()); printf("\n"); } m4ri-release-20240729/bench/bench_rank.c000066400000000000000000000146221465170556500176160ustar00rootroot00000000000000#include #include "benchmarking.h" #include "cpucycles.h" #include #include #ifdef HAVE_LIBPAPI #define _GNU_SOURCE #include #include #include // papi.h needs caddr_t #endif struct elim_params { rci_t m; rci_t n; rci_t r; char const *algorithm; }; static unsigned long long loop_calibration[32]; int run_nothing(void *_p, unsigned long long *data, int *data_len) { struct elim_params *p = (struct elim_params *)_p; mzd_t *A = mzd_init(p->m, p->n); if (p->r != 0) { mzd_t *L, *U; L = mzd_init(p->m, p->m); U = mzd_init(p->m, p->n); mzd_randomize(U); mzd_randomize(L); for (rci_t i = 0; i < p->m; ++i) { for (rci_t j = i + 1; j < p->m; j += m4ri_radix) { int const length = MIN(m4ri_radix, p->m - j); mzd_clear_bits(L, i, j, length); } mzd_write_bit(L, i, i, 1); for (rci_t j = 0; j < i && j < p->n; j += m4ri_radix) { int const length = MIN(m4ri_radix, i - j); mzd_clear_bits(U, i, j, length); } if (i < p->r) { mzd_write_bit(U, i, i, 1); } else { for (rci_t j = i; j < p->n; j += m4ri_radix) { int const length = MIN(m4ri_radix, p->n - j); mzd_clear_bits(U, i, j, length); } } } mzd_mul(A, L, U, 0); mzd_free(L); mzd_free(U); } else { mzd_randomize(A); } #ifndef HAVE_LIBPAPI *data_len = 2; #else *data_len = MIN(papi_array_len + 1, *data_len); #endif int papi_res; #ifndef HAVE_LIBPAPI data[0] = walltime(0); data[1] = cpucycles(); #else int array_len = *data_len - 1; unsigned long long t0 = PAPI_get_virt_usec(); papi_res = PAPI_start_counters((int *)papi_events, array_len); if (papi_res) m4ri_die(""); #endif #ifndef HAVE_LIBPAPI data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); #else PAPI_stop_counters((long long *)&data[1], array_len); t0 = PAPI_get_virt_usec() - t0; data[0] = t0; for (int nv = 0; nv <= array_len; ++nv) { if (data[nv] < loop_calibration[nv]) loop_calibration[nv] = data[nv]; } #endif mzd_free(A); return (0); } int run(void *_p, unsigned long long *data, int *data_len) { struct elim_params *p = (struct elim_params *)_p; #ifndef HAVE_LIBPAPI *data_len = 2; #else *data_len = MIN(papi_array_len + 1, *data_len); #endif int papi_res; mzd_t *A = mzd_init(p->m, p->n); if (p->r != 0) { mzd_t *L, *U; L = mzd_init(p->m, p->m); U = mzd_init(p->m, p->n); mzd_randomize(U); mzd_randomize(L); for (rci_t i = 0; i < p->m; ++i) { for (rci_t j = i + 1; j < p->m; j += m4ri_radix) { int const length = MIN(m4ri_radix, p->m - j); mzd_clear_bits(L, i, j, length); } mzd_write_bit(L, i, i, 1); for (rci_t j = 0; j < i && j < p->n; j += m4ri_radix) { int const length = MIN(m4ri_radix, i - j); mzd_clear_bits(U, i, j, length); } if (i < p->r) { mzd_write_bit(U, i, i, 1); } else { for (rci_t j = i; j < p->n; j += m4ri_radix) { int const length = MIN(m4ri_radix, p->n - i); mzd_clear_bits(U, i, j, length); } } } mzd_mul(A, L, U, 0); mzd_free(L); mzd_free(U); } else { mzd_randomize(A); } mzp_t *P = mzp_init(A->nrows); mzp_t *Q = mzp_init(A->ncols); #ifndef HAVE_LIBPAPI data[0] = walltime(0); data[1] = cpucycles(); #else int array_len = *data_len - 1; unsigned long long t0 = PAPI_get_virt_usec(); papi_res = PAPI_start_counters((int *)papi_events, array_len); if (papi_res) m4ri_die(""); #endif if (strcmp(p->algorithm, "m4ri") == 0) p->r = mzd_echelonize_m4ri(A, 0, 0); else if (strcmp(p->algorithm, "ple") == 0) p->r = mzd_ple(A, P, Q, 0); else if (strcmp(p->algorithm, "mmpf") == 0) p->r = _mzd_ple_russian(A, P, Q, 0); else m4ri_die("unknown algorithm %s", p->algorithm); #ifndef HAVE_LIBPAPI data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); #else mzp_free(P); mzp_free(Q); PAPI_stop_counters((long long *)&data[1], array_len); t0 = PAPI_get_virt_usec() - t0; data[0] = t0; for (int nv = 0; nv <= array_len; ++nv) { data[nv] -= loop_calibration[nv]; } #endif mzd_free(A); return 0; } void print_help_and_exit() { printf("Parameters m(, n, alg, r) expected.\n"); printf(" m -- integer > 0\n"); printf(" n -- integer > 0\n"); printf(" alg -- 'm4ri', 'ple', or 'mmpf' (default: 'ple')\n"); printf(" r -- target rank >= 0, if 0 then mzd_randomize() is called (default: MIN(m,n))\n"); printf("\n"); bench_print_global_options(stderr); m4ri_die(""); } int main(int argc, char **argv) { int opts = global_options(&argc, &argv); int data_len; #ifdef HAVE_LIBPAPI int papi_counters = PAPI_num_counters(); if (papi_counters < papi_array_len) { fprintf(stderr, "%s: Warning: there are only %d hardware counters available!\n", progname, papi_counters); papi_array_len = papi_counters; } if (papi_test(papi_events, papi_array_len)) exit(1); for (int nv = 0; nv <= papi_array_len; ++nv) loop_calibration[nv] = 100000000; data_len = papi_array_len + 1; #else data_len = 2; #endif if (opts < 0 || argc < 2 || argc > 5) { print_help_and_exit(); } struct elim_params params; params.m = atoi(argv[1]); if (argc >= 3) params.n = atoi(argv[2]); else params.n = params.m; if (argc >= 4) params.algorithm = argv[3]; else params.algorithm = "ple"; if (argc >= 5) params.r = atoi(argv[4]); else params.r = MIN(params.m, params.n); srandom(17); unsigned long long data[16]; for (int i = 0; i < 4; ++i) run_nothing((void *)¶ms, data, &data_len); run_bench(run, (void *)¶ms, data, data_len); double cc_per_op = ((double)data[1]) / ((double)params.m * (double)params.n * powl((double)params.r, 0.807)); printf("m: %5d, n: %5d, last r: %5d, cpu cycles: %12llu, cc/(mnr^0.807): %.5lf, ", params.m, params.n, params.r, data[1], cc_per_op); print_wall_time(data[0] / 1000000.0); printf(", "); print_cpu_time(data[1] / (double)cpucycles_persecond()); printf("\n"); #ifdef HAVE_LIBPAPI for (int n = 1; n < data_len; ++n) { double tmp = ((double)data[n]) / powl((double)params.n, 2.807); printf("%20s (%20llu) per bit (divided by n^2.807): %15.5f\n", papi_event_name(papi_events[n - 1]), data[n], tmp); } #endif } m4ri-release-20240729/bench/bench_trsm.c000066400000000000000000000047461465170556500176560ustar00rootroot00000000000000#include #include "benchmarking.h" #include "cpucycles.h" #include #include struct trsm_params { rci_t m; rci_t n; int upper; int left; char const *algorithm; }; mzd_t *mzd_random_lower(const rci_t n) { mzd_t *A = mzd_init(n, n); mzd_randomize(A); for (rci_t i = 0; i < n; i++) mzd_write_bit(A, i, i, 1); mzd_t *L = mzd_extract_l(NULL, A); mzd_free(A); return L; } mzd_t *mzd_random_upper(const rci_t n) { mzd_t *A = mzd_init(n, n); mzd_randomize(A); for (rci_t i = 0; i < n; i++) mzd_write_bit(A, i, i, 1); mzd_t *U = mzd_extract_u(NULL, A); mzd_free(A); return U; } int run(void *_p, unsigned long long *data, int *data_len) { struct trsm_params *p = (struct trsm_params *)_p; *data_len = 2; mzd_t *T = NULL; mzd_t *B = mzd_init(p->m, p->n); mzd_randomize(B); if (p->upper) { T = mzd_random_upper(p->m); } else { T = mzd_random_lower(p->m); } data[0] = walltime(0); data[1] = cpucycles(); switch (2 * p->upper + p->left) { case 3: mzd_trsm_upper_left(T, B, 0); break; case 2: mzd_trsm_upper_right(T, B, 0); break; case 1: mzd_trsm_lower_left(T, B, 0); break; case 0: mzd_trsm_lower_right(T, B, 0); break; default: m4ri_die("Parameters for upper (=%d) or left (=%d) not supported", p->upper, p->left); } data[0] = walltime(data[0]); data[1] = cpucycles() - data[1]; mzd_free(B); mzd_free(T); return 0; } int main(int argc, char **argv) { int opts = global_options(&argc, &argv); if (opts < 0) { bench_print_global_options(stderr); exit(-1); } if (argc != 5) { printf("Parameters m,n,upper,left expected.\n"); printf(" m -- integer > 0\n"); printf(" n -- integer > 0\n"); printf(" upper -- 1 for upper triangular, 0 for lower triangular.\n"); printf(" left -- 1 for triangular matrix on left, 0 for right\n"); printf("\n"); bench_print_global_options(stderr); exit(-1); } struct trsm_params p; p.m = atoi(argv[1]); p.n = atoi(argv[2]); p.upper = atoi(argv[3]); p.left = atoi(argv[4]); srandom(17); unsigned long long data[2]; run_bench(run, (void *)&p, data, 2); /** this has no meaning if m << n **/ double cc_per_op = (4 * (double)data[1]) / (p.m * powl((double)p.n, 1.807)); printf("m: %5d, n: %5d, upper: %d, left: %d, cpu cycles: %llu, cc/(n^2.807): %.5lf, wall time: " "%lf\n", p.m, p.n, p.upper, p.left, data[1], cc_per_op, data[0] / 1000000.0); } m4ri-release-20240729/bench/benchmarking.c000066400000000000000000000551511465170556500201560ustar00rootroot00000000000000/* * benchmarking.c * * Benchmark engine. * * Copyright (C) 2011 Carlo Wood * RSA-1024 0x624ACAD5 1997-01-26 Sign & Encrypt * Fingerprint16 = 32 EC A7 B6 AC DB 65 A6 F6 F6 55 DD 1C DC FF 61 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * Example usage: * * ./bench_elimination -s 0 -m 4 -c 90 -a 0.005 -d -t 30 -n 1000 1000 1000 * * would run at most 30 seconds (-t) or 1000 times (-n), whichever comes * first, or stop after the real average of wall time (-s 0) falls with 90% * certainty (-c) in a range that is +/- 0.005 times the observed mean (-a: accuracry), * but no sooner than that at least 4 (-m: minimum) measurements have been * done. It would also print (-d: dump) each measurement (0:microseconds 1:cpuclocks). * * Example output. * * 2416 6441500 * 2376 6335490 * 2360 6294450 * 2361 6295280 * 2371 6321440 * 2350 6266740 * 2362 6298700 * 2386 6362520 * 2344 6249890 * 2347 6260450 * 2346 6254590 * Total running time: 0.103 seconds. * Virtual time (s): Sample size: 11; mean: 0.002365; standard deviation: 0.000021 * Virtual time (s): 90% confidence interval: +/- 0.000012 (0.5%): [0.002354..0.002377] * * The last three lines can be suppressed by passing the option -q (quiet). */ #include #ifdef HAVE_LIBPAPI #define _GNU_SOURCE #include #include #include // papi.h needs caddr_t #endif #include "benchmarking.h" #include #include #include #include #include #include #include enum { C80, C90, C95, C98, C99 }; /* * Command line option decoding */ int bench_quiet = 0; // Set if -q is used. int bench_dump = 0; // Set if -d is used. int bench_minimum = 2; // Minimum number of measurements. Set with -m . int bench_maximum = 1000; // Maximum number of measurements. Set with -n . unsigned long long bench_maxtime = 60000000; // Maximum number of microseconds to run. Set with -t // , in seconds (floating point). double bench_accuracy = 0.01; // The +/- range (where 1.0 is 100%) within that we want the real population mean to be // with the given confidence. Set with -a int bench_confidence_index = C99; // The confidence that the real mean is within the given (or found) range. int bench_stats = 1; // The counter used for statistics (0 = realtime, 1 = cpuclocks). Set with -s . int bench_dump_counter = -1; // The counter to dump (see bench_stats). Set with -d . If // not given all counters are dumped. char const *progname; // Set to argv[0]. /* * Command line option used by bench_packedmatrix.c */ uint64_t bench_count = 0; // Can be set by -x , otherwise a reasonable default is being used. #ifdef HAVE_LIBPAPI int bench_disregard_L2_misses = 0; // Set if -2 is used. /* * PAPI events being counted. */ int papi_events[32] = { PAPI_TOT_CYC, /* Total cycles. This must always be the first entry. */ }; int papi_array_len = 1; int bench_PAPI_L2_TCM_index; char *papi_event_name(int event) { // PAPI needs to be initialized before calling PAPI_event_code_to_name. if (PAPI_is_initialized() == PAPI_NOT_INITED) { int res = PAPI_library_init(PAPI_VER_CURRENT); if (res != PAPI_OK && res != PAPI_VER_CURRENT) { fprintf(stderr, "%s: PAPI_library_init: error code %d %s\n", progname, res, PAPI_strerror(res)); m4ri_die("PAPI failed to initialize.\n"); } } static char buf[PAPI_MAX_STR_LEN]; int res = PAPI_event_code_to_name(event, buf); if (res) snprintf(buf, PAPI_MAX_STR_LEN, "", event); return buf; } int papi_add_event(char const *event_name) { // PAPI needs to be initialized before calling PAPI_event_name_to_code. if (PAPI_is_initialized() == PAPI_NOT_INITED) { int res = PAPI_library_init(PAPI_VER_CURRENT); if (res != PAPI_OK && res != PAPI_VER_CURRENT) { fprintf(stderr, "%s: PAPI_library_init: error code %d %s\n", progname, res, PAPI_strerror(res)); m4ri_die("PAPI failed to initialize.\n"); } } int event; int res = PAPI_event_name_to_code((char *)event_name, &event); if (res != PAPI_OK) { if (res == PAPI_ENOEVNT) fprintf(stderr, "%s: %s: No such event.\n", progname, event_name); else fprintf(stderr, "%s: PAPI_event_name_to_code(\"%s\"): %s\n", progname, event_name, PAPI_strerror(res)); return res; } int found = 0; for (int nv = 0; nv < papi_array_len; ++nv) { if (papi_events[nv] == event) { found = 1; break; } } if (!found) papi_events[papi_array_len++] = event; return 0; } void papi_add_events(char *event_names) { char *tmpptr; char *name = strtok_r(event_names, ", ", &tmpptr); while (name) { papi_add_event(name); name = strtok_r(NULL, ", ", &tmpptr); } } #endif // HAVE_LIBPAPI int global_options(int *argcp, char ***argvp) { int result = 0; progname = (*argvp)[0]; while ((*argcp) > 1) { if ((*argvp)[1][0] != '-' || (*argvp)[1][1] == '\0' || (*argvp)[1][2] != '\0') return result; switch ((*argvp)[1][1]) { case 'd': bench_dump = 1; if (isdigit((*argvp)[2][0])) { ++*argvp; --*argcp; bench_dump_counter = atoi((*argvp)[1]); } break; case 'q': bench_quiet = 1; break; #ifdef HAVE_LIBPAPI case '2': { bench_disregard_L2_misses = 1; if (papi_add_event("PAPI_L2_TCM")) { fprintf(stderr, "%s: Ignoring -2: Level 2 cache misses cannot be detected with the current set of " "PAPI events (-p).\n", progname); bench_disregard_L2_misses = 0; } for (int nv = 0; nv < papi_array_len; ++nv) { if (papi_events[nv] == PAPI_L2_TCM) { bench_PAPI_L2_TCM_index = nv + 1; // +1 for in data[] inserted virtual time at index 0. break; } } break; } case 'p': { ++*argvp; --*argcp; papi_add_events((*argvp)[1]); break; } #endif case 'm': ++*argvp; --*argcp; bench_minimum = atoi((*argvp)[1]); break; case 'n': ++*argvp; --*argcp; bench_maximum = atoi((*argvp)[1]); if (bench_maximum < bench_minimum) bench_minimum = bench_maximum; break; case 't': ++*argvp; --*argcp; bench_maxtime = 1000000 * strtod((*argvp)[1], NULL); break; case 'a': ++*argvp; --*argcp; bench_accuracy = strtod((*argvp)[1], NULL); break; case 'c': { ++*argvp; --*argcp; int confidence = atoi((*argvp)[1]); switch (confidence) { case 80: bench_confidence_index = C80; break; case 90: bench_confidence_index = C90; break; case 95: bench_confidence_index = C95; break; case 98: bench_confidence_index = C98; break; case 99: bench_confidence_index = C99; break; default: m4ri_die("The only possible confidence percentages are 80, 90, 95, 98 and 99%\n"); break; } break; } case 'x': ++*argvp; --*argcp; bench_count = atoll((*argvp)[1]); break; case 's': ++*argvp; --*argcp; bench_stats = atoi((*argvp)[1]); break; default: return -1; } ++result; ++*argvp; --*argcp; } return result; } void bench_print_global_options(FILE *out) { fprintf(out, "OPTIONS\n"); fprintf(out, " -m Do at least number of measurements. Default 2.\n"); fprintf(out, " -n Do at most number of measurements. Default 1000.\n"); fprintf(out, " -t Stop after seconds. Default 60.0 seconds.\n"); fprintf(out, " -a Stop after has been reached. Default 0.01 (= 1%%).\n"); fprintf(out, " -c Stop when accuracy has been reached with this confidence. " "Default 99 (%%).\n"); fprintf(out, " -s Counter to perform statistic over (0: realtime, 1: cpuclocks. " "Default: 1).\n"); fprintf(out, " -x Call function times in the inner most loop (calls " "per measurement).\n"); fprintf(out, " -d [] Dump measurements. Dump all or only when given.\n"); fprintf(out, " -q Quiet. Suppress printing of statistics.\n"); #ifdef HAVE_LIBPAPI fprintf(out, " -2 Disregard measurements with any level 2 cache misses.\n"); fprintf(out, " -p [,,...]\n"); fprintf(out, " Count and report the given events. The list is comma or space " "separated,\n"); fprintf(out, " for example -p \"PAPI_TOT_INS PAPI_L1_DCM\".\n"); fprintf(out, " Run `papi_event_chooser PRESET PAPI_TOT_CYC [PAPI_*]` for more " "events.\n"); #endif } /* * vector implementation * * vector_create: Create vector of size s. * vector_destruct: Destruct vector. * vector_resize: Resize internal allocation. * vector_size: Return number of elements. * vector_pushback: Add one element at the end. * vector_get: Get element at position index. */ struct vector_st { size_t alloc_size; size_t size; double *data; }; typedef struct vector_st *vector; vector vector_create(size_t s) { vector v = (vector)malloc(sizeof(struct vector_st)); v->alloc_size = s; v->data = s ? (double *)malloc(sizeof(double) * s) : NULL; v->size = 0; return v; } void vector_destruct(vector v) { free(v->data); free(v); } void vector_resize(vector v, size_t s) { v->data = (double *)realloc(v->data, sizeof(double) * s); v->alloc_size = s; if (v->size > v->alloc_size) v->size = v->alloc_size; } static inline size_t vector_size(vector v) { return v->size; } void vector_pushback(vector v, double d) { if (++(v->size) > v->alloc_size) vector_resize(v, v->alloc_size * 2); v->data[v->size - 1] = d; } static inline double vector_get(vector v, int index) { return v->data[index]; } /* * Normal distribution * * normal_calculate: Calculate the mean and standard deviation of the data in vector v. * * Returns -1 on failure (not enough data points), 0 otherwise. */ struct normal_st { int size; double mean; double sigma; }; typedef struct normal_st normal; int normal_calculate(vector v, normal *dist, double multiplier) { dist->size = vector_size(v); if (dist->size < 2) { dist->mean = vector_get(v, 0) * multiplier; dist->sigma = 0.0; return 0; } // Calculate the sum of all data. double sum = 0; for (int i = 0; i < dist->size; ++i) sum += vector_get(v, i) * multiplier; dist->mean = sum / dist->size; // Calculate the sum of the square of all differences with mean. sum = 0; for (int i = 0; i < dist->size; ++i) { double delta = vector_get(v, i) * multiplier - dist->mean; sum += delta * delta; } dist->sigma = sqrt(sum / (dist->size - 1)); return 0; } /* * T-Table */ static float student_t[5][34] = { {3.078, 1.886, 1.638, 1.533, 1.476, 1.440, 1.415, 1.397, 1.383, 1.372, 1.363, 1.356, 1.350, 1.345, 1.341, 1.337, 1.333, 1.330, 1.328, 1.325, 1.323, 1.321, 1.319, 1.318, 1.316, 1.315, 1.314, 1.313, 1.311, 1.310, 1.303, 1.296, 1.289, 1.282}, {6.314, 2.920, 2.353, 2.132, 2.015, 1.943, 1.895, 1.860, 1.833, 1.812, 1.796, 1.782, 1.771, 1.761, 1.753, 1.746, 1.740, 1.734, 1.729, 1.725, 1.721, 1.717, 1.714, 1.711, 1.708, 1.706, 1.703, 1.701, 1.699, 1.697, 1.684, 1.671, 1.658, 1.645}, {12.706, 4.303, 3.182, 2.776, 2.571, 2.447, 2.365, 2.306, 2.262, 2.228, 2.201, 2.179, 2.160, 2.145, 2.131, 2.120, 2.110, 2.101, 2.093, 2.086, 2.080, 2.074, 2.069, 2.064, 2.060, 2.056, 2.052, 2.048, 2.045, 2.042, 2.021, 2.000, 1.980, 1.960}, {31.821, 6.965, 4.541, 3.747, 3.365, 3.143, 2.998, 2.896, 2.821, 2.764, 2.718, 2.681, 2.650, 2.624, 2.602, 2.583, 2.567, 2.552, 2.539, 2.528, 2.518, 2.508, 2.500, 2.492, 2.485, 2.479, 2.473, 2.467, 2.462, 2.457, 2.423, 2.390, 2.358, 2.326}, {63.657, 9.925, 5.841, 4.604, 4.032, 3.707, 3.499, 3.355, 3.250, 3.169, 3.106, 3.055, 3.012, 2.977, 2.947, 2.921, 2.898, 2.878, 2.861, 2.845, 2.831, 2.819, 2.807, 2.797, 2.787, 2.779, 2.771, 2.763, 2.756, 2.750, 2.704, 2.660, 2.617, 2.576}}; static float student_t_certainty[5] = {0.2, 0.1, 0.05, 0.02, 0.01}; // Two-tails. static float t_table(int confidence_index, int freedoms) { if (freedoms <= 30) return student_t[confidence_index][freedoms - 1]; double a, b, y1, y2, y3; long x1, x2; long x3 = 0; int i; if (freedoms <= 60) { i = 29; x1 = 30; x2 = 40; x3 = 60; } else if (freedoms <= 120) { i = 30; x1 = 40; x2 = 60; x3 = 120; } else { i = 31; x1 = 60; x2 = 120; /* x3 = infinity */ } y1 = student_t[confidence_index][i]; y2 = student_t[confidence_index][i + 1]; y3 = student_t[confidence_index][i + 2]; if (freedoms <= 120) { double c, d; d = (x1 * x1 * (x3 - x2) + x2 * x2 * (x1 - x3) + x3 * x3 * (x2 - x1)); a = -(x1 * (y3 - y2) + x2 * (y1 - y3) + x3 * (y2 - y1)) / d; b = (x1 * x1 * (y3 - y2) + x2 * x2 * (y1 - y3) + x3 * x3 * (y2 - y1)) / d; c = y2 - a * x2 * x2 - b * x2; return (a * freedoms * freedoms + b * freedoms + c); } double ln1, ln2; ln1 = log(y2 - y3); ln2 = log(y1 - y3); a = -(ln1 - ln2) / (x1 - x2); b = (x1 * ln1 - x2 * ln2) / (x1 - x2); return (y3 + exp(a * freedoms + b)); } /* * walltime */ unsigned long long walltime(unsigned long long t0) { static time_t base_sec; struct timeval tp; gettimeofday(&tp, NULL); if (__M4RI_UNLIKELY(base_sec == 0)) base_sec = tp.tv_sec; return (tp.tv_sec - base_sec) * 1000000 + tp.tv_usec - t0; } /* * Printing doubles. */ int bench_precision(double sigma) { if (sigma < 1E-10) return 12; int log_sigma = log10(sigma); if (log_sigma >= 2) return 0; return 2 - log_sigma; } void print_double(double d, int precision) { switch (precision) { case 0: printf("%.0f", d); break; case 1: printf("%.1f", d); break; case 2: printf("%.2f", d); break; case 3: printf("%.3f", d); break; case 4: printf("%.4f", d); break; case 5: printf("%.5f", d); break; case 6: printf("%.6f", d); break; case 7: printf("%.7f", d); break; case 8: printf("%.8f", d); break; case 9: printf("%.9f", d); break; case 10: printf("%.10f", d); break; case 11: printf("%.11f", d); break; case 12: printf("%.12f", d); break; } } /* * run_bench * * Benchmark main loop. */ int run_bench(int (*f)(void *params, unsigned long long *data, int *data_len), void *params, unsigned long long *data, int data_len) { double const CONFIDENCE = 1.0 - student_t_certainty[bench_confidence_index]; unsigned long long data_sum[32]; memset(data_sum, 0, sizeof(data_sum)); data_len = MIN(data_len, sizeof(data_sum) / sizeof(unsigned long long)); vector stats_data = vector_create(128); normal stats; #ifdef HAVE_LIBPAPI int total_calls = 0; #endif if (!bench_count) bench_count = 1; unsigned long long start_walltime = walltime(0); for (int n = 1; n <= bench_maximum; ++n) { if (!bench_quiet && !bench_dump) { printf("."); fflush(stdout); } do { int res = f(params, data, &data_len); if (res < 0) m4ri_die("benchmark function failed with exit code: %d\n", res); #ifdef HAVE_LIBPAPI ++total_calls; #endif } #ifdef HAVE_LIBPAPI while (bench_disregard_L2_misses && data[bench_PAPI_L2_TCM_index]); #else while (0); #endif if (bench_dump) { if (bench_dump_counter >= 0 && bench_dump_counter < data_len) printf("%llu", data[bench_dump_counter]); else { printf("%llu", data[0]); for (int nv = 1; nv < data_len; ++nv) printf(" %llu", data[nv]); } printf("\n"); fflush(stdout); } vector_pushback(stats_data, data[bench_stats]); for (int nv = 0; nv < data_len; ++nv) data_sum[nv] += data[nv]; if (n >= bench_minimum && normal_calculate(stats_data, &stats, (bench_stats == 0) ? 0.000001 : (1.0 / bench_count)) == 0) { double standard_error = stats.sigma / sqrt(stats.size); double critical_value = t_table(bench_confidence_index, stats.size - 1); // Stop when the real mean lays with CONFIDENCE in the range [mean * (1 - bench_accuracy), // mean * (1 + bench_accuracy)]. or when we're already running bench_maxtime seconds. if (standard_error * critical_value / stats.mean <= bench_accuracy || walltime(start_walltime) > bench_maxtime) break; } } for (int nv = 0; nv < data_len; ++nv) data[nv] = (data_sum[nv] + stats.size / 2) / stats.size; if (!bench_quiet) { if (!bench_quiet && !bench_dump) printf("\n"); printf("Total running time: %6.3f seconds.\n", walltime(start_walltime) / 1000000.0); #ifdef HAVE_LIBPAPI if (bench_disregard_L2_misses) printf("Samples disregarded because of level 2 cache misses: %d\n", total_calls - stats.size); #endif int precision = bench_precision(stats.sigma); #ifdef HAVE_LIBPAPI if (bench_stats) printf("%s: ", papi_event_name(papi_events[bench_stats - 1])); else printf("Virtual time (s): "); #endif printf("Sample size: %d; mean: ", stats.size); print_double(stats.mean, precision); printf("; standard deviation: "); print_double(stats.sigma, precision); printf("\n"); #ifdef HAVE_LIBPAPI if (bench_stats) printf("%s: ", papi_event_name(papi_events[bench_stats - 1])); else printf("Virtual time (s): "); #endif double standard_error = stats.sigma / sqrt(stats.size); double critical_value = t_table(bench_confidence_index, stats.size - 1); double accuracy = standard_error * critical_value; printf("%2.0f%% confidence interval: +/- ", CONFIDENCE * 100); print_double(accuracy, precision); printf(" (%.1f%%): [", accuracy / stats.mean * 100); print_double(stats.mean - accuracy, precision); printf(".."); print_double(stats.mean + accuracy, precision); printf("]\n"); } vector_destruct(stats_data); return data_len; } /* * Randomize */ // The same as m4ri_random_word. Duplicated here because it's // not available in older revisions that we want to benchmark against. word bench_random_word() { // random() only returns 31 bits, so we need three calls. word a0 = random(); word a1 = random(); word a2 = random(); word v = a0 ^ (a1 << 24) ^ a2 << 48; #ifdef BENCH_RANDOM_REVERSE v = ((v >> 1) & 0x5555555555555555ULL) | ((v & 0x5555555555555555ULL) << 1); v = ((v >> 2) & 0x3333333333333333ULL) | ((v & 0x3333333333333333ULL) << 2); v = ((v >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((v & 0x0F0F0F0F0F0F0F0FULL) << 4); v = ((v >> 8) & 0x00FF00FF00FF00FFULL) | ((v & 0x00FF00FF00FF00FFULL) << 8); v = ((v >> 16) & 0x0000FFFF0000FFFFULL) | ((v & 0x0000FFFF0000FFFFULL) << 16); v = (v >> 32) | (v << 32); #endif return v; } // Needed for mzd_t. #include // The same as m4ri_randomize. Duplicated here because it's // not available in older revisions that we want to benchmark against. void bench_randomize(mzd_t *A) { wi_t const width = A->width - 1; int const offset = 0; word const mask_end = __M4RI_LEFT_BITMASK(A->ncols % m4ri_radix); for (rci_t i = 0; i < A->nrows; ++i) { word *row = mzd_row(A, i); for (wi_t j = 0; j < width; ++j) row[j] = bench_random_word(); row[width] ^= (row[width] ^ bench_random_word()) & mask_end; } } /* * Random number generator */ static uint64_t bench_random_M; static uint64_t bench_random_modulo; void bench_random_init(uint64_t modulo) { // Set bench_random_M to the largest multiple of modulo, minus one, that fits in an uint64_t. // A modulo of zero is interpreted as 2^64, and thus returns 0xffffffffffffffff. bench_random_M = modulo ? -modulo / modulo * modulo - 1 : -1; bench_random_M += modulo; bench_random_modulo = modulo; } // Returns a uniformly distributed random number in the range [0, bench_random_modulo>. uint64_t bench_random() { for (;;) { word R = bench_random_word(); if (R <= bench_random_M) return R % bench_random_modulo; } } void print_wall_time(double seconds) { if (seconds >= 0.01) printf("wall time: %10.5f s", seconds); else if (seconds >= 0.00001) printf("wall time: %10.5f ms", 1000.0 * seconds); else printf("wall time: %10.5f us", 1000000.0 * seconds); } void print_cpu_time(double seconds) { if (seconds >= 0.01) printf("cpu time: %10.5f s", seconds); else if (seconds >= 0.00001) printf("cpu time: %10.5f ms", 1000.0 * seconds); else printf("cpu time: %10.5f us", 1000000.0 * seconds); } #ifdef HAVE_LIBPAPI int papi_test(int *papi_events, int papi_array_len) { int res = PAPI_start_counters(papi_events, papi_array_len); switch (res) { case 0: { long long *tmp = (long long *)malloc(papi_array_len * sizeof(long long)); PAPI_stop_counters(tmp, papi_array_len); free(tmp); break; } case PAPI_ECNFLCT: { fprintf(stderr, "%s: %s: Conflicting event: The underlying counter hardware cannot count the specified " "events simultaneously.\n", progname, papi_event_name(papi_events[papi_array_len - 1])); fprintf(stderr, "Run `papi_event_chooser PRESET"); for (int nv = 0; nv < papi_array_len - 1; ++nv) fprintf(stderr, " %s", papi_event_name(papi_events[nv])); fprintf(stderr, "` to get a list of possible events that can be added.\n"); break; } case PAPI_ENOEVNT: { for (int nv = 0; nv < papi_array_len; ++nv) if ((res = PAPI_query_event(papi_events[nv])) != PAPI_OK) { fprintf(stderr, "%s: PAPI_start_counters: %s: %s.\n", progname, papi_event_name(papi_events[nv]), PAPI_strerror(res)); break; } break; } case PAPI_ESYS: fprintf(stderr, "%s: PAPI_start_counters: %s\n", progname, strerror(errno)); break; default: fprintf(stderr, "%s: PAPI_start_counters: %s.\n", progname, PAPI_strerror(res)); break; } return res; } #endif m4ri-release-20240729/bench/benchmarking.h000066400000000000000000000017341465170556500201610ustar00rootroot00000000000000#ifndef BENCHMARKETING_H #define BENCHMARKETING_H #include #include /* * Command line options. See benchmarking.h for documentation. */ extern int bench_quiet; extern int bench_dump; extern int bench_minimum; extern int bench_maximum; extern unsigned long long bench_maxtime; extern double bench_accuracy; extern int bench_confidence_index; extern char const *progname; extern uint64_t bench_count; unsigned long long walltime(unsigned long long t0); int global_options(int *argcp, char ***argvp); void bench_print_global_options(FILE *); int run_bench(int (*f)(void *params, unsigned long long *data, int *data_len), void *params, unsigned long long *data, int data_len); #ifdef HAVE_LIBPAPI extern int papi_events[]; extern int papi_array_len; char *papi_event_name(int event); int papi_test(int *papi_events, int papi_array_len); #endif void print_wall_time(double seconds); void print_cpu_time(double seconds); #endif // BENCHMARKETING_H m4ri-release-20240729/bench/cpucycles-20060326/000077500000000000000000000000001465170556500202455ustar00rootroot00000000000000m4ri-release-20240729/bench/cpucycles-20060326/alpha.c000066400000000000000000000027351465170556500215050ustar00rootroot00000000000000/* cpucycles/alpha.c version 20060316 D. J. Bernstein Public domain. */ #include #include #include static long long tod(void) { struct timeval t; gettimeofday(&t,(struct timezone *) 0); return t.tv_sec * (long long) 1000000 + t.tv_usec; } static long long rpcc(void) { unsigned long long t; asm volatile("rpcc %0" : "=r"(t)); return t & 0xffffffff; } static long long firstrpcc; static long long firsttod; static long long lastrpcc; static long long lasttod; static double mhz = 0; static void init(void) { firstrpcc = rpcc(); firsttod = tod(); do { lastrpcc = rpcc(); lasttod = tod(); } while (lasttod - firsttod < 10000); lastrpcc -= firstrpcc; lastrpcc &= 0xffffffff; lasttod -= firsttod; mhz = (double) lastrpcc / (double) lasttod; } long long cpucycles_alpha(void) { double x; long long y; if (!mhz) init(); lastrpcc = rpcc(); lasttod = tod(); lastrpcc -= firstrpcc; lastrpcc &= 0xffffffff; lasttod -= firsttod; /* Number of cycles since firstrpcc is lastrpcc + 2^32 y for unknown y. */ /* Number of microseconds since firsttod is lasttod. */ x = (lasttod * mhz - lastrpcc) * 0.00000000023283064365386962890625; y = x; while (x > y + 0.5) y += 1; while (x < y - 0.5) y -= 1; y *= 4294967296ULL; lastrpcc += y; mhz = (double) lastrpcc / (double) lasttod; return firstrpcc + lastrpcc; } long long cpucycles_alpha_persecond(void) { if (!mhz) init(); return 1000000.0 * mhz; } m4ri-release-20240729/bench/cpucycles-20060326/alpha.h000066400000000000000000000007171465170556500215100ustar00rootroot00000000000000/* cpucycles alpha.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_alpha_h #define CPUCYCLES_alpha_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_alpha(void); extern long long cpucycles_alpha_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "alpha" #define cpucycles cpucycles_alpha #define cpucycles_persecond cpucycles_alpha_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/amd64cpuinfo.c000066400000000000000000000011101465170556500227010ustar00rootroot00000000000000#include #include long long cpucycles_amd64cpuinfo(void) { unsigned long long result; asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (result) :: "%rdx"); return result; } long long cpucycles_amd64cpuinfo_persecond(void) { FILE *f; double result; int s; f = fopen("/proc/cpuinfo","r"); if (!f) return 0; for (;;) { s = fscanf(f,"cpu MHz : %lf",&result); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { result = 0; break; } } fclose(f); return 1000000.0 * result; } m4ri-release-20240729/bench/cpucycles-20060326/amd64cpuinfo.h000066400000000000000000000010071465170556500227130ustar00rootroot00000000000000/* cpucycles amd64cpuinfo.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_amd64cpuinfo_h #define CPUCYCLES_amd64cpuinfo_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_amd64cpuinfo(void); extern long long cpucycles_amd64cpuinfo_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "amd64cpuinfo" #define cpucycles cpucycles_amd64cpuinfo #define cpucycles_persecond cpucycles_amd64cpuinfo_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/amd64tscfreq.c000066400000000000000000000006371465170556500227220ustar00rootroot00000000000000#include #include long long cpucycles_amd64tscfreq(void) { unsigned long long result; asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (result) :: "%rdx"); return result; } long long cpucycles_amd64tscfreq_persecond(void) { long result = 0; size_t resultlen = sizeof(long); sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0); return result; } m4ri-release-20240729/bench/cpucycles-20060326/amd64tscfreq.h000066400000000000000000000010071465170556500227170ustar00rootroot00000000000000/* cpucycles amd64tscfreq.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_amd64tscfreq_h #define CPUCYCLES_amd64tscfreq_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_amd64tscfreq(void); extern long long cpucycles_amd64tscfreq_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "amd64tscfreq" #define cpucycles cpucycles_amd64tscfreq #define cpucycles_persecond cpucycles_amd64tscfreq_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/clockmonotonic.c000066400000000000000000000012761465170556500234400ustar00rootroot00000000000000#include #include #include #include #include #include static double cpufrequency = 0; static void init(void) { long result = 0; size_t resultlen = sizeof(long); sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0); cpufrequency = result; } long long cpucycles_clockmonotonic(void) { double result; struct timespec t; if (!cpufrequency) init(); clock_gettime(CLOCK_MONOTONIC,&t); result = t.tv_nsec; result *= 0.000000001; result += (double) t.tv_sec; result *= cpufrequency; return result; } long long cpucycles_clockmonotonic_persecond(void) { if (!cpufrequency) init(); return cpufrequency; } m4ri-release-20240729/bench/cpucycles-20060326/clockmonotonic.h000066400000000000000000000010271465170556500234370ustar00rootroot00000000000000/* cpucycles clockmonotonic.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_clockmonotonic_h #define CPUCYCLES_clockmonotonic_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_clockmonotonic(void); extern long long cpucycles_clockmonotonic_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "clockmonotonic" #define cpucycles cpucycles_clockmonotonic #define cpucycles_persecond cpucycles_clockmonotonic_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/compile000077500000000000000000000012731465170556500216260ustar00rootroot00000000000000#!/bin/sh case "$COMPILER" in suncc) case "$ARCHITECTURE" in 64) /opt/SUNWspro/bin/cc -xarch=v9 -O2 "$@" ;; 32) /opt/SUNWspro/bin/cc -xarch=v8 -O2 "$@" ;; *) /opt/SUNWspro/bin/cc -O2 "$@" ;; esac ;; ibmcc) case "$ARCHITECTURE" in 64) xlc -q64 -O2 "$@" ;; 32) xlc -q32 -O2 "$@" ;; *) xlc -O2 "$@" ;; esac ;; hpcc) case "$ARCHITECTURE" in 64) /opt/ansic/bin/cc +DD64 -O2 "$@" ;; 32) /opt/ansic/bin/cc +DD32 -O2 "$@" ;; *) /opt/ansic/bin/cc -O2 "$@" ;; esac ;; *) case "$ARCHITECTURE" in 64) gcc -m64 -O2 "$@" ;; 32) gcc -m32 -O2 "$@" ;; *) gcc -O2 "$@" ;; esac ;; esac m4ri-release-20240729/bench/cpucycles-20060326/cpucycles.html000066400000000000000000000316651465170556500231400ustar00rootroot00000000000000 cpucycles: counting CPU cycles

cpucycles: counting CPU cycles

A C or C++ program can call cpucycles() to receive a long long cycle count. The program has to
     #include "cpucycles.h"
and link to cpucycles.o. The program can look at the constant string cpucycles_implementation to see which implementation of cpucycles it's using. The program can also call cpucycles_persecond() to receive a long long estimate of the number of cycles per second.

Here's how to create cpucycles.h and cpucycles.o:

     wget http://ebats.cr.yp.to/cpucycles-20060326.tar.gz
     gunzip < cpucycles-20060326.tar.gz | tar -xf -
     cd cpucycles-20060326
     sh do
The do script creates cpucycles.h and cpucycles.o. It also prints one line of output showing the implementation selected, the number of cycles per second, a double-check of the number of cycles per second, and the differences between several adjacent calls to the cpucycles() function.

Some systems have multiple incompatible formats for executable programs. The most important reason is that some CPUs (the Athlon 64, for example, and the UltraSPARC) have two incompatible modes, a 32-bit mode and a 64-bit mode. On these systems, you can run

     env ARCHITECTURE=32 sh do
to create a 32-bit cpucycles.o or
     env ARCHITECTURE=64 sh do
to create a 64-bit cpucycles.o.

Notes on accuracy

Benchmarking tools are encouraged to record several timings of a function: call cpucycles(), function(), cpucycles(), function(), etc., and then print one line reporting the differences between successive cpucycles() results. The median of several differences is much more stable than the average.

Cycle counts continue to increase while other programs are running, while the operating system is handling an interruption such as a network packet, etc. This won't affect the median of several timings of a fast function---the function usually won't be interrupted---but it can affect the median of several timings of a slow function. Hopefully a benchmarking machine isn't running other programs.

On dual-CPU systems (and dual-core systems such as the Athlon 64 X2), the CPUs often don't have synchronized cycle counters, so a process that switches CPUs can have its cycle counts jump forwards or backwards. I've never seen this affect the median of several timings.

Some CPUs dynamically reduce CPU speed to save power, but deliberately keep their cycle counters running at full speed, the idea being that measuring time is more important than measuring cycles. Hopefully a benchmarking machine won't enter power-saving mode.

Cycle counts are occasionally off by a multiple of 2^32 on some CPUs, as discussed below. I've never seen this affect the median of several timings.

The estimate returned by cpucycles_persecond() may improve accuracy after cpucycles() has been called repeatedly.

Implementations

alpha. The Alpha's built-in cycle-counting function counts cycles modulo 2^32. cpucycles usually manages to fix this by calling gettimeofday (which takes a large but low-variance number of cycles) and automatically estimating the chip speed. In extreme situations the resulting cycle counts could still be off by a multiple of 2^32.

Results on td161: alpha 499845359 499838717 423 360 336 349 353 348 469 329 348 345 348 345 348 345 348 345 348 345 348 348 348 345 348 345 348 345 348 348 348 345 348 345 348 345 348 348 348 345 348 345 348 345 348 348 348 345 348 345 348 345 348 348 348 468 318 348 345 348 345 348 345 348 345 348

amd64cpuinfo. cpucycles uses the CPU's RDTSC instruction to count cycles, and reads /proc/cpuinfo to see the kernel's estimate of cycles per second.

Results on dancer with ARCHITECTURE=64 (default): amd64cpuinfo 2002653000 2002526765 22 9 9 8 8 17 6 10 5 9 8 8 8 17 6 10 5 9 8 8 8 17 6 10 5 9 8 8 8 17 6 10 5 9 8 8 11 14 15 28 10 8 9 12 23 106 10 8 8 8 8 8 8 17 6 10 5 9 8 8 8 17 6 10

amd64tscfreq. cpucycles uses the CPU's RDTSC instruction to count cycles, and uses sysctlbyname("machdep.tsc_freq",...) to see the kernel's estimate of cycles per second.

clockmonotonic. Backup option, using the POSIX clock_gettime(CLOCK_MONOTONIC) function to count nanoseconds and using sysctlbyname("machdep.tsc_freq",...) to see the kernel's estimate of cycles per second. This often has much worse than microsecond precision.

Results on whisper (artificially induced): clockmonotonic 1298904202 1298866469 2177 1815 2177 2177 1814 2177 2178 2177 1814 2178 2177 1814 2177 2177 1815 2177 2177 1814 2177 2179 1813 2178 2177 1815 2177 2177 1814 2177 2177 2177 1815 2178 2177 1813 2178 2177 1815 2177 2177 1814 2177 2177 1815 2177 2177 1814 2177 2179 2177 1814 2177 2177 2177 1815 2177 2177 1814 2177 2178 1814 2178 2177 1814 2177

gettimeofday. Backup option, using the POSIX gettimeofday() function to count microseconds and /proc/cpuinfo to see the kernel's estimate of cycles per second. This often has much worse than microsecond precision.

Results on dancer (artificially induced) with ARCHITECTURE=32: gettimeofday 2002653000 2002307748 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 4005 0 4005 2003 0 4005 2003 2002 2003 2003 2002 2003 2003 2002 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 4005 0 2003 4005 0 4006 2002 2003

Results on dancer (artificially induced) with ARCHITECTURE=64 (default): gettimeofday 2002653000 2002293956 2560 1792 2048 1792 2048 2304 1792 2048 1792 0 2048 2304 2048 1792 1792 2048 0 2304 2048 1792 2048 1792 2304 0 2048 1792 2048 1792 2048 0 2304 2048 1792 1792 2048 2304 2048 0 1792 2048 1792 2304 2048 1792 2048 0 1792 2560 1792 1792 2048 1792 0 2560 25600 2048 1792 2560 1792 0 2048 1792 2048 2304

hppapstat. cpucycles uses the CPU's MFCTL %cr16 instruction to count cycles, and pstat(PSTAT_PROCESSOR,...) to see the kernel's estimate of cycles per second.

Results on hp400: hppapstat 440000000 439994653 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11

powerpcaix. cpucycles uses the CPU's MFTB instruction to count ``time base''; uses /usr/sbin/lsattr -E -l proc0 -a frequency to see the kernel's estimate of cycles per second; and spends some time comparing MFTB to gettimeofday() to figure out the number of time-base counts per second.

I've seen a 533MHz PowerPC G4 (7410) with a 16-cycle time base; a 668MHz POWER RS64 IV (SStar) system with a 1-cycle time base; a 1452MHz POWER with an 8-cycle time base; and a 2000MHz PowerPC G5 (970) with a 60-cycle time base.

Results on tigger: powerpcaix 1452000000 1451981436 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64

powerpclinux. cpucycles uses the CPU's MFTB instruction to count ``time base''; reads /proc/cpuinfo to see the kernel's estimate of cycles per second; and spends some time comparing MFTB to gettimeofday() to figure out the number of time-base counts per second.

Results on gggg: powerpclinux 533000000 532650134 48 32 48 32 32 48 32 32 48 32 32 48 32 32 48 32 32 48 32 32 32 48 32 32 48 32 32 48 32 32 48 32 32 48 32 32 32 48 32 32 48 32 32 48 32 32 48 32 32 48 32 32 32 48 32 32 48 32 32 48 32 32 48 32

powerpcmacos. cpucycles uses the mach_absolute_time function to count ``time base''; uses sysctlbyname("hw.cpufrequency",...) to see the kernel's estimate of cycles per second; and uses sysctlbyname("hw.tbfrequency",...) to see the kernel's estimate of time-base counts per second.

Results on geespaz with ARCHITECTURE=32 (default): powerpcmacos 2000000000 1999891801 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60

Results on geespaz with ARCHITECTURE=64: powerpcmacos 2000000000 1999896339 420 60 60 60 60 60 60 0 60 60 60 60 60 60 60 60 0 60 60 60 60 60 60 60 60 60 0 60 60 60 60 60 60 60 60 60 0 60 60 60 60 60 60 60 0 60 60 60 60 60 60 60 60 0 60 60 60 60 60 60 60 60 0 60

sparc32psrinfo. cpucycles uses the CPU's RDTICK instruction in 32-bit mode to count cycles, and runs /usr/sbin/psrinfo -v to see the kernel's estimate of cycles per second.

Results on icarus with ARCHITECTURE=32 (default): sparc32psrinfo 900000000 899920056 297 23 23 18 22 23 18 17 22 18 17 22 23 18 17 129 17 17 17 17 17 17 17 97 17 17 17 17 17 17 17 85 17 17 17 17 17 17 17 97 17 17 17 17 17 17 17 85 17 17 17 17 17 17 17 97 17 17 17 17 17 17 17 85

Results on wessel with ARCHITECTURE=32 (default): sparc32psrinfo 900000000 899997269 39 23 18 22 18 25 72 17 22 18 17 22 23 26 71 17 17 17 17 17 17 17 85 17 17 17 17 17 17 17 97 17 17 17 17 17 17 17 85 17 17 17 17 17 17 17 97 17 17 17 17 17 17 17 85 17 17 17 17 17 17 17 109 17

sparcpsrinfo. cpucycles uses the CPU's RDTICK instruction in 64-bit mode to count cycles, and runs /usr/sbin/psrinfo -v to see the kernel's estimate of cycles per second.

Results on icarus with ARCHITECTURE=64: sparcpsrinfo 900000000 899920264 289 12 12 12 12 12 12 19 12 113 19 12 12 12 12 12 12 130 12 12 12 12 12 12 12 144 12 12 12 12 12 12 12 144 12 12 12 12 12 12 12 144 12 12 12 12 12 12 12 144 12 12 12 12 12 12 12 144 12 12 12 12 12 12

Results on wessel with ARCHITECTURE=64: sparcpsrinfo 900000000 899997032 29 19 12 19 19 19 12 12 123 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12

x86cpuinfo. cpucycles uses the CPU's RDTSC instruction to count cycles, and reads /proc/cpuinfo to see the kernel's estimate of cycles per second. There have been reports of the 64-bit cycle counters on some x86 CPUs being occasionally off by 2^32; cpucycles makes no attempt to fix this.

Results on cruncher: x86cpuinfo 132957999 132951052 60 36 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32

Results on dali: x86cpuinfo 448882000 448881565 49 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45

Results on dancer with ARCHITECTURE=32: x86cpuinfo 2002653000 2002538651 26 11 9 11 10 17 11 10 10 10 9 10 9 12 9 173 11 10 10 10 10 17 11 10 10 10 9 10 9 17 11 10 10 10 9 10 9 17 11 10 10 10 9 10 9 17 11 10 10 10 9 10 9 17 11 10 10 10 9 10 9 17 11 10

Results on fireball: x86cpuinfo 1894550999 1894188944 104 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88

Results on neumann: x86cpuinfo 999534999 999456935 49 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44

Results on rzitsc: x86cpuinfo 2799309000 2799170567 132 96 100 104 100 100 96 96 96 100 96 108 104 104 112 96 112 96 108 96 112 96 96 96 100 112 120 100 96 100 104 112 96 96 96 88 96 128 108 96 116 96 100 100 108 96 100 96 108 96 104 100 112 96 100 96 100 100 88 108 100 108 92 96

Results on shell: x86cpuinfo 3391548999 3391341751 108 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88

Results on thoth: x86cpuinfo 900447000 900028758 67 19 18 18 19 188 16 16 16 19 19 18 19 147 16 16 16 19 19 17 16 16 16 16 16 19 19 17 16 16 16 16 16 19 19 17 16 16 16 16 16 19 19 18 19 156 16 16 16 19 19 18 19 147 16 16 16 19 19 18 19 147 16 16

x86tscfreq. cpucycles uses the CPU's RDTSC instruction to count cycles, and uses sysctlbyname("machdep.tsc_freq",...) to see the kernel's estimate of cycles per second.

Results on whisper: x86tscfreq 1298904202 1298892874 72 72 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53

Version

This is the cpucycles-20060326.html web page. This web page is in the public domain. m4ri-release-20240729/bench/cpucycles-20060326/do000066400000000000000000000021701465170556500205720ustar00rootroot00000000000000#!/bin/sh output="cpucycles.o cpucycles.h" cleanup="test cpucycles-impl.o cpucycles-impl.h cpucycles-impl.c" exec 2>do.notes rm -f $output $cleanup ( echo amd64tscfreq gcc echo amd64cpuinfo gcc echo x86tscfreq gcc echo x86cpuinfo gcc echo powerpclinux gcc echo powerpcmacos gcc echo powerpcaix gcc echo powerpcaix ibmcc echo sparcpsrinfo gcc echo sparcpsrinfo suncc echo sparc32psrinfo gcc echo sparc32psrinfo suncc echo hppapstat gcc echo hppapstat hpcc echo alpha gcc echo clockmonotonic gcc echo gettimeofday gcc ) | ( while read name compiler do echo ===== Trying $name.c with $compiler... >&2 rm -f $cleanup cp $name.c cpucycles-impl.c || continue cp $name.h cpucycles-impl.h || continue env COMPILER=$compiler ./compile -c cpucycles-impl.c || continue env COMPILER=$compiler ./compile -o test test.c cpucycles-impl.o || continue ./test || continue echo ===== Success. Using $name.c. >&2 mv cpucycles-impl.o cpucycles.o mv cpucycles-impl.h cpucycles.h rm -f $cleanup exit 0 done echo ===== Giving up. >&2 rm -f $output $cleanup exit 111 ) m4ri-release-20240729/bench/cpucycles-20060326/gettimeofday.c000066400000000000000000000014651465170556500231000ustar00rootroot00000000000000#include #include #include #include static double cpufrequency = 0; static void init(void) { FILE *f; double result; int s; f = fopen("/proc/cpuinfo","r"); if (!f) return; for (;;) { s = fscanf(f,"cpu MHz : %lf",&result); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { result = 0; break; } } fclose(f); cpufrequency = 1000000.0 * result; } long long cpucycles_gettimeofday(void) { double result; struct timeval t; if (!cpufrequency) init(); gettimeofday(&t,(struct timezone *) 0); result = t.tv_usec; result *= 0.000001; result += (double) t.tv_sec; result *= cpufrequency; return result; } long long cpucycles_gettimeofday_persecond(void) { if (!cpufrequency) init(); return cpufrequency; } m4ri-release-20240729/bench/cpucycles-20060326/gettimeofday.h000066400000000000000000000010071465170556500230750ustar00rootroot00000000000000/* cpucycles gettimeofday.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_gettimeofday_h #define CPUCYCLES_gettimeofday_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_gettimeofday(void); extern long long cpucycles_gettimeofday_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "gettimeofday" #define cpucycles cpucycles_gettimeofday #define cpucycles_persecond cpucycles_gettimeofday_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/hppapstat.c000066400000000000000000000010421465170556500224120ustar00rootroot00000000000000#include #include #include #include #include #include long long cpucycles_hppapstat(void) { register long long result; _MFCTL(16,result); return result; } long long cpucycles_hppapstat_persecond(void) { struct pst_processor pst; union pstun pu; double result; pu.pst_processor = &pst; if (pstat(PSTAT_PROCESSOR,pu,sizeof(pst),1,0) < 0) return 0; result = pst.psp_iticksperclktick; result *= (double) sysconf(_SC_CLK_TCK); return result; } m4ri-release-20240729/bench/cpucycles-20060326/hppapstat.h000066400000000000000000000007571465170556500224330ustar00rootroot00000000000000/* cpucycles hppapstat.h version 20060319 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_hppapstat_h #define CPUCYCLES_hppapstat_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_hppapstat(void); extern long long cpucycles_hppapstat_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "hppapstat" #define cpucycles cpucycles_hppapstat #define cpucycles_persecond cpucycles_hppapstat_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/powerpcaix.c000066400000000000000000000030351465170556500225730ustar00rootroot00000000000000#include #include #include #include #include static long myround(double u) { long result = u; while (result + 0.5 < u) result += 1; while (result - 0.5 > u) result -= 1; return result; } static long long microseconds(void) { struct timeval t; gettimeofday(&t,(struct timezone *) 0); return t.tv_sec * (long long) 1000000 + t.tv_usec; } static long long timebase(void) { unsigned long high; unsigned long low; unsigned long newhigh; unsigned long long result; asm volatile( "Lcpucycles:mftbu %0;mftb %1;mftbu %2;cmpw %0,%2;bne Lcpucycles" : "=r" (high), "=r" (low), "=r" (newhigh) ); result = high; result <<= 32; result |= low; return result; } static double cpufrequency = 0; static long tbcycles = 0; static void init(void) { FILE *f; long long tb0; long long us0; long long tb1; long long us1; f = popen("/usr/sbin/lsattr -E -l proc0 -a frequency","r"); if (!f) return; if (fscanf(f,"frequency %lf",&cpufrequency) < 1) cpufrequency = 0; pclose(f); if (!cpufrequency) return; tb0 = timebase(); us0 = microseconds(); do { tb1 = timebase(); us1 = microseconds(); } while (us1 - us0 < 10000); if (tb1 <= tb0) return; tb1 -= tb0; us1 -= us0; tbcycles = myround((cpufrequency * 0.000001 * (double) us1) / (double) tb1); } long long cpucycles_powerpcaix(void) { if (!tbcycles) init(); return timebase() * tbcycles; } long long cpucycles_powerpcaix_persecond(void) { if (!tbcycles) init(); return cpufrequency; } m4ri-release-20240729/bench/cpucycles-20060326/powerpcaix.h000066400000000000000000000007671465170556500226110ustar00rootroot00000000000000/* cpucycles powerpcaix.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_powerpcaix_h #define CPUCYCLES_powerpcaix_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_powerpcaix(void); extern long long cpucycles_powerpcaix_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "powerpcaix" #define cpucycles cpucycles_powerpcaix #define cpucycles_persecond cpucycles_powerpcaix_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/powerpclinux.c000066400000000000000000000032341465170556500231520ustar00rootroot00000000000000#include #include #include #include #include static long myround(double u) { long result = u; while (result + 0.5 < u) result += 1; while (result - 0.5 > u) result -= 1; return result; } static long long microseconds(void) { struct timeval t; gettimeofday(&t,(struct timezone *) 0); return t.tv_sec * (long long) 1000000 + t.tv_usec; } static long long timebase(void) { unsigned long high; unsigned long low; unsigned long newhigh; unsigned long long result; asm volatile( "Lcpucycles:mftbu %0;mftb %1;mftbu %2;cmpw %0,%2;bne Lcpucycles" : "=r" (high), "=r" (low), "=r" (newhigh) ); result = high; result <<= 32; result |= low; return result; } static double cpufrequency = 0; static long tbcycles = 0; static void init(void) { FILE *f; int s; long long tb0; long long us0; long long tb1; long long us1; f = fopen("/proc/cpuinfo","r"); if (!f) return 0; for (;;) { s = fscanf(f," clock : %lf MHz",&cpufrequency); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { cpufrequency = 0; break; } } fclose(f); if (!cpufrequency) return; cpufrequency *= 1000000.0; tb0 = timebase(); us0 = microseconds(); do { tb1 = timebase(); us1 = microseconds(); } while (us1 - us0 < 10000); if (tb1 <= tb0) return; tb1 -= tb0; us1 -= us0; tbcycles = myround((cpufrequency * 0.000001 * (double) us1) / (double) tb1); } long long cpucycles_powerpclinux(void) { if (!tbcycles) init(); return timebase() * tbcycles; } long long cpucycles_powerpclinux_persecond(void) { if (!tbcycles) init(); return cpufrequency; } m4ri-release-20240729/bench/cpucycles-20060326/powerpclinux.h000066400000000000000000000010071465170556500231530ustar00rootroot00000000000000/* cpucycles powerpclinux.h version 20060319 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_powerpclinux_h #define CPUCYCLES_powerpclinux_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_powerpclinux(void); extern long long cpucycles_powerpclinux_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "powerpclinux" #define cpucycles cpucycles_powerpclinux #define cpucycles_persecond cpucycles_powerpclinux_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/powerpcmacos.c000066400000000000000000000017521465170556500231200ustar00rootroot00000000000000#include #include #include #define timebase mach_absolute_time static int cpumib[2] = { CTL_HW, HW_CPU_FREQ } ; static int tbmib[2] = { CTL_HW, HW_TB_FREQ } ; static long myround(double u) { long result = u; while (result + 0.5 < u) result += 1; while (result - 0.5 > u) result -= 1; return result; } static long tbcycles = 0; static void init(void) { int cpufrequency = 0; size_t cpufrequencylen = sizeof(int); int tbfrequency = 0; size_t tbfrequencylen = sizeof(int); sysctl(cpumib,2,&cpufrequency,&cpufrequencylen,0,0); sysctl(tbmib,2,&tbfrequency,&tbfrequencylen,0,0); if (tbfrequency > 0) tbcycles = myround((double) cpufrequency / (double) tbfrequency); } long long cpucycles_powerpcmacos(void) { if (!tbcycles) init(); return timebase() * tbcycles; } long long cpucycles_powerpcmacos_persecond(void) { int result = 0; size_t resultlen = sizeof(int); sysctl(cpumib,2,&result,&resultlen,0,0); return result; } m4ri-release-20240729/bench/cpucycles-20060326/powerpcmacos.h000066400000000000000000000010071465170556500231160ustar00rootroot00000000000000/* cpucycles powerpcmacos.h version 20060319 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_powerpcmacos_h #define CPUCYCLES_powerpcmacos_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_powerpcmacos(void); extern long long cpucycles_powerpcmacos_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "powerpcmacos" #define cpucycles cpucycles_powerpcmacos #define cpucycles_persecond cpucycles_powerpcmacos_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/sparc32psrinfo.c000066400000000000000000000011611465170556500232660ustar00rootroot00000000000000#include #include long long cpucycles_sparc32psrinfo(void) { long long result; asm volatile( ".word 0x93410000;.word 0x91327020;mov %%g0,%0" : "=r" (result) : : "%g0" ); return result; } long long cpucycles_sparc32psrinfo_persecond(void) { FILE *f; double result; int s; f = popen("/usr/sbin/psrinfo -v","r"); if (!f) return 0; for (;;) { s = fscanf(f," The %*s processor operates at %lf MHz",&result); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { result = 0; break; } } pclose(f); return 1000000.0 * result; } m4ri-release-20240729/bench/cpucycles-20060326/sparc32psrinfo.h000066400000000000000000000010271465170556500232740ustar00rootroot00000000000000/* cpucycles sparc32psrinfo.h version 20060319 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_sparc32psrinfo_h #define CPUCYCLES_sparc32psrinfo_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_sparc32psrinfo(void); extern long long cpucycles_sparc32psrinfo_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "sparc32psrinfo" #define cpucycles cpucycles_sparc32psrinfo #define cpucycles_persecond cpucycles_sparc32psrinfo_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/sparcpsrinfo.c000066400000000000000000000010541465170556500231220ustar00rootroot00000000000000#include #include long long cpucycles_sparcpsrinfo(void) { long long result; asm volatile("rd %%tick,%0" : "=r" (result)); return result; } long long cpucycles_sparcpsrinfo_persecond(void) { FILE *f; double result; int s; f = popen("/usr/sbin/psrinfo -v","r"); if (!f) return 0; for (;;) { s = fscanf(f," The %*s processor operates at %lf MHz",&result); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { result = 0; break; } } pclose(f); return 1000000.0 * result; } m4ri-release-20240729/bench/cpucycles-20060326/sparcpsrinfo.h000066400000000000000000000010071465170556500231250ustar00rootroot00000000000000/* cpucycles sparcpsrinfo.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_sparcpsrinfo_h #define CPUCYCLES_sparcpsrinfo_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_sparcpsrinfo(void); extern long long cpucycles_sparcpsrinfo_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "sparcpsrinfo" #define cpucycles cpucycles_sparcpsrinfo #define cpucycles_persecond cpucycles_sparcpsrinfo_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/test.c000066400000000000000000000026631465170556500213770ustar00rootroot00000000000000#include #include #include #include #include "cpucycles-impl.h" static long long tod(void) { struct timeval t; gettimeofday(&t,(struct timezone *) 0); return t.tv_sec * (long long) 1000000 + t.tv_usec; } long long todstart; long long todend; long long cpustart; long long cpuend; long long t[1001]; main() { int i; for (i = 0;i <= 1000;++i) t[i] = cpucycles(); for (i = 0;i < 1000;++i) if (t[i] > t[i + 1]) { fprintf(stderr,"t[%d] = %lld\n",i,t[i]); fprintf(stderr,"t[%d] = %lld\n",i + 1,t[i + 1]); fprintf(stderr,"cpucycles_persecond() = %lld\n",cpucycles_persecond()); return 100; } if (t[0] == t[1000]) { fprintf(stderr,"t[%d] = %lld\n",0,t[0]); fprintf(stderr,"t[%d] = %lld\n",1000,t[1000]); fprintf(stderr,"cpucycles_persecond() = %lld\n",cpucycles_persecond()); return 100; } if (cpucycles_persecond() <= 0) { fprintf(stderr,"cpucycles_persecond() = %lld\n",cpucycles_persecond()); return 100; } todstart = tod(); cpustart = cpucycles(); sleep(1); todend = tod(); cpuend = cpucycles(); todend -= todstart; cpuend -= cpustart; for (i = 0;i <= 1000;++i) t[i] = cpucycles(); printf("%s",cpucycles_implementation); printf(" %lld",cpucycles_persecond()); printf(" %lld",(long long) (((double) cpuend) * 1000000.0 / (double) todend)); for (i = 0;i < 64;++i) printf(" %lld",t[i + 1] - t[i]); printf("\n"); return 0; } m4ri-release-20240729/bench/cpucycles-20060326/x86cpuinfo.c000066400000000000000000000010151465170556500224170ustar00rootroot00000000000000#include #include long long cpucycles_x86cpuinfo(void) { long long result; asm volatile(".byte 15;.byte 49" : "=A" (result)); return result; } long long cpucycles_x86cpuinfo_persecond(void) { FILE *f; double result; int s; f = fopen("/proc/cpuinfo","r"); if (!f) return 0; for (;;) { s = fscanf(f,"cpu MHz : %lf",&result); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { result = 0; break; } } fclose(f); return 1000000.0 * result; } m4ri-release-20240729/bench/cpucycles-20060326/x86cpuinfo.h000066400000000000000000000007671465170556500224410ustar00rootroot00000000000000/* cpucycles x86cpuinfo.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_x86cpuinfo_h #define CPUCYCLES_x86cpuinfo_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_x86cpuinfo(void); extern long long cpucycles_x86cpuinfo_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "x86cpuinfo" #define cpucycles cpucycles_x86cpuinfo #define cpucycles_persecond cpucycles_x86cpuinfo_persecond #endif #endif m4ri-release-20240729/bench/cpucycles-20060326/x86tscfreq.c000066400000000000000000000005511465170556500224270ustar00rootroot00000000000000#include #include long long cpucycles_x86tscfreq(void) { long long result; asm volatile(".byte 15;.byte 49" : "=A" (result)); return result; } long long cpucycles_x86tscfreq_persecond(void) { long result = 0; size_t resultlen = sizeof(long); sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0); return result; } m4ri-release-20240729/bench/cpucycles-20060326/x86tscfreq.h000066400000000000000000000007671465170556500224450ustar00rootroot00000000000000/* cpucycles x86tscfreq.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_x86tscfreq_h #define CPUCYCLES_x86tscfreq_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_x86tscfreq(void); extern long long cpucycles_x86tscfreq_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "x86tscfreq" #define cpucycles cpucycles_x86tscfreq #define cpucycles_persecond cpucycles_x86tscfreq_persecond #endif #endif m4ri-release-20240729/bitbucket-pipelines.yml000066400000000000000000000005061465170556500207620ustar00rootroot00000000000000image: gcc:10 pipelines: default: - step: script: - autoreconf -i # https://github.com/google/sanitizers/issues/916 - ASAN_OPTIONS=detect_leaks=0 CFLAGS="-fsanitize=address" ./configure --enable-debug - ASAN_OPTIONS=detect_leaks=0 make check - make distcheck m4ri-release-20240729/configure.ac000066400000000000000000000171071465170556500165700ustar00rootroot00000000000000AC_INIT([m4ri],[20240729]) AC_CANONICAL_HOST AC_CONFIG_SRCDIR(m4ri/brilliantrussian.c) AM_INIT_AUTOMAKE dnl Include maintainer mode targets. AM_MAINTAINER_MODE dnl Needed when reconfiguring with 'autoreconf -i -s' AC_CONFIG_MACRO_DIR([m4]) dnl Compiling with per-target flags (test_elimination.c) requires AM_PROG_CC_C_O. AM_PROG_CC_C_O LT_INIT AC_PROG_INSTALL AC_CONFIG_HEADERS(m4ri/config.h) AC_PROG_CC if test "$ac_cv_prog_cc_c99" = "no"; then AC_MSG_ERROR([C99 support is required but not found.]) fi # Find and set LIBM on the platform. LT_LIB_M AC_SUBST(LIBM) # SSE2 support AC_ARG_ENABLE([sse2], AS_HELP_STRING([--disable-sse2], [don't use SSE2 instruction set.]), , [if test "$m4ri_wrapword" = "yes"; then enable_sse2="no"; else enable_sse2="yes"; fi]) AS_IF([test "x$enable_sse2" != "xno"], [ if test "$m4ri_wrapword" = "yes"; then AC_MSG_ERROR([SSE2 cannot be supported when wrapping word in a C++ class.]) fi AX_EXT() ]) if test x"$ax_cv_have_sse2_ext" = x"yes"; then M4RI_HAVE_SSE2=1 else M4RI_HAVE_SSE2=0 fi AC_SUBST(M4RI_HAVE_SSE2) AC_ARG_WITH(papi, AS_HELP_STRING([--with-papi@<:@=PATH@:>@], [The PAPI install prefix, if configure can't find it.]), [m4ri_config_papi=$withval]) AC_ARG_WITH(cachesize, AS_HELP_STRING([--with-cachesize@<:@=VALUE@:>@], [L1,L2 and L3 cache sizes in bytes, separated by a colon. Overrides cache tuning.]),[m4ri_config_cachesize=$withval]) AC_CHECK_HEADER([mm_malloc.h],AC_DEFINE(HAVE_MM_MALLOC,,[Support aligned allocations]),) if test "$ac_cv_header_mm_malloc_h" = "yes"; then M4RI_HAVE_MM_MALLOC=1 else M4RI_HAVE_MM_MALLOC=0 fi AC_SUBST(M4RI_HAVE_MM_MALLOC) # Correctly working posix_memalign AX_FUNC_POSIX_MEMALIGN if test "$ax_cv_func_posix_memalign_works" = "yes"; then M4RI_HAVE_POSIX_MEMALIGN=1 else M4RI_HAVE_POSIX_MEMALIGN=0 fi AC_SUBST(M4RI_HAVE_POSIX_MEMALIGN) # OpenMP support AC_ARG_ENABLE([openmp], AS_HELP_STRING( [--enable-openmp],[add support for OpenMP multicore support.])) AS_IF([test "x$enable_openmp" = "xyes"], [ AX_OPENMP() ]) AC_SUBST(OPENMP_CFLAGS) if test -n "$OPENMP_CFLAGS"; then M4RI_HAVE_OPENMP=1 else M4RI_HAVE_OPENMP=0 fi AC_SUBST(M4RI_HAVE_OPENMP) # Thread-Safety M4RI_ENABLE_MZD_CACHE=1 M4RI_ENABLE_MMC=1 AC_ARG_ENABLE([thread-safe], AS_HELP_STRING([--enable-thread-safe], [make library thread safe.])) if test "x$enable_thread_safe" = "xyes"; then M4RI_ENABLE_MMC=0 M4RI_ENABLE_MZD_CACHE=0 fi if test $M4RI_HAVE_OPENMP = 1; then M4RI_ENABLE_MZD_CACHE=0 fi AC_SUBST(M4RI_ENABLE_MZD_CACHE) AC_SUBST(M4RI_ENABLE_MMC) # Debugging support AC_ARG_ENABLE([debug], AS_HELP_STRING([--enable-debug], [enable assert() statements for debugging.])) AC_ARG_ENABLE([debug-dump], AS_HELP_STRING([--enable-debug-dump], [dump output at exit of every function.])) if test "x$enable_debug_dump" = "xyes"; then M4RI_DEBUG_DUMP=1 else M4RI_DEBUG_DUMP=0 fi AC_SUBST(M4RI_DEBUG_DUMP) AC_ARG_ENABLE([debug-mzd], AS_HELP_STRING([--enable-debug-mzd], [add consistency checks on matrix structures.])) if test "x$enable_debug_mzd" = "xyes"; then M4RI_DEBUG_MZD=1 else M4RI_DEBUG_MZD=0 fi AC_SUBST(M4RI_DEBUG_MZD) if test "x$enable_debug" = x"yes"; then DEBUG_FLAGS="-g" AC_SUBST(DEBUG_FLAGS) else if test "x$enable_debug_mzd" != "xyes"; then AC_DEFINE(NDEBUG,1,[define whether debugging is enabled]) fi fi # For tests. Detect if PAPI is installed. See http://icl.cs.utk.edu/papi/ . if test -z "$m4ri_config_papi"; then AC_CHECK_LIB(papi, PAPI_start_counters, [ AX_GUESS_PATH_LIB(papi) AX_GUESS_PATH_HEADER(papi.h) if test -n "$LIBPAPI_PATH"; then PAPI_LDFLAGS="-Wl,-rpath,$LIBPAPI_PATH" PAPI_LIBS="-L$LIBPAPI_PATH -lpapi" else PAPI_LIBS="-lpapi" if ! test -e "/usr/lib/libpapi.so"; then AC_MSG_WARN([Could not find libpapi.so. Use --with-papi= or set LD_LIBRARY_PATH correctly before running benchmark applications.]) fi fi if test -n "$PAPI_H_PATH"; then PAPI_CFLAGS="-I$PAPI_H_PATH" AC_DEFINE_UNQUOTED([HAVE_LIBPAPI], 1, [Define when libpapi is available.]) else AC_MSG_WARN([Could not find papi.h; Use --with-papi= or add -I/include to either CPPFLAGS or CFLAGS, or turn off papi all together by configuring with --without-papi.]) fi ]) fi if test x"$m4ri_config_papi" != x"no" && test -n "$m4ri_config_papi"; then LIBPAPI_PATH="`realpath -s $m4ri_config_papi/lib`" PAPI_H_PATH="`realpath -s $m4ri_config_papi/include`" PAPI_CFLAGS="-I$PAPI_H_PATH" PAPI_LDFLAGS="-Wl,-rpath,$LIBPAPI_PATH" PAPI_LIBS="-L$LIBPAPI_PATH -lpapi" AC_DEFINE_UNQUOTED([HAVE_LIBPAPI], 1, [Define when libpapi is available.]) fi AC_SUBST(PAPI_LIBS) AC_SUBST(PAPI_LDFLAGS) AC_SUBST(PAPI_CFLAGS) AC_ARG_ENABLE([cachetune], AS_HELP_STRING([--enable-cachetune],[calculate cache size from timing information (deprecated).])) # Cache Sizes if test -z $m4ri_config_cachesize; then AX_CACHE_SIZE() AS_IF([test "x$enable_cachetune" = "xyes"], [AC_MSG_WARN(--enable-cachetune is deprecated since it usually does not provide optimal parameters.) AX_CACHE_SIZE_TUNE()]) else AS_IF([test "x$enable_cachetune" = "xyes"], [AC_MSG_WARN(Ignoring cache tuning since --with-cachesize was given.)]) ax_l1_size=`echo $m4ri_config_cachesize | cut -d ":" -f 1` ax_l2_size=`echo $m4ri_config_cachesize | cut -d ":" -f 2` ax_l3_size=`echo $m4ri_config_cachesize | cut -d ":" -f 3` M4RI_CPU_L1_CACHE=${ax_l1_size} M4RI_CPU_L2_CACHE=${ax_l2_size} M4RI_CPU_L3_CACHE=${ax_l3_size} AC_SUBST(M4RI_CPU_L1_CACHE) AC_SUBST(M4RI_CPU_L2_CACHE) AC_SUBST(M4RI_CPU_L3_CACHE) fi # PNG have_libpng="no" AC_ARG_ENABLE([png], [AS_HELP_STRING([--disable-png],[disable PNG support @<:@default=enabled@:>@])], [ if test "x${enableval}" = "xyes" ; then want_png="yes" else want_png="no" fi ], [want_png="yes"]) AC_MSG_CHECKING([whether to build with PNG support]) AC_MSG_RESULT([${want_png}]) if test "x${want_png}" = "xyes" ; then PKG_CHECK_MODULES([PNG], [libpng], [have_libpng="yes"; LIBPNG_LIBADD=${PNG_LIBS}; LIBPNG_CFLAGS=${PNG_CFLAGS}; M4RI_USE_PNG_PC=libpng], [have_libpng="no"]) if ! test "x${have_libpng}" = "xyes" ; then AC_CHECK_LIB([png], [png_create_write_struct], [have_libpng="yes"; LIBPNG_LIBADD="-lpng"], [AC_CHECK_LIB([png14], [png_create_write_struct], [have_libpng="yes"; LIBPNG_LIBADD="-lpng14"], [AC_CHECK_LIB([png12], [png_create_write_struct], [have_libpng="yes"; LIBPNG_LIBADD="-lpng12"], [AC_CHECK_LIB([png10], [png_create_write_struct], [have_libpng="yes"; LIBPNG_LIBADD="-lpng10"], [have_libpng="no"]) ]) ]) ]) RAW_LIBPNG=${LIBPNG_LIBADD} fi if test "x${have_libpng}" = "xno" ; then AC_MSG_WARN([Can not find a usuable PNG library. Make sure that CPPFLAGS and LDFLAGS are correctly set.]) fi fi if test "x${have_libpng}" = "xyes" ; then M4RI_HAVE_LIBPNG=1 AC_SUBST(M4RI_HAVE_LIBPNG) AC_SUBST(LIBPNG_LIBADD) AC_SUBST(LIBPNG_CFLAGS) AC_SUBST(M4RI_USE_PNG_PC) AC_SUBST(RAW_LIBPNG) else M4RI_HAVE_LIBPNG=0 AC_SUBST(M4RI_HAVE_LIBPNG) fi M4RI_BUILDING_M4RI=1 AC_DEFINE([M4RI_BUILDING_M4RI], [1], [Define to indicate that m4ri is being built instead of being used]) RELEASE="AC_PACKAGE_VERSION" AC_SUBST(RELEASE) AC_PROG_MAKE_SET AC_CONFIG_FILES([Makefile tests/Makefile bench/Makefile m4ri/m4ri_config.h m4ri.pc]) AC_OUTPUT m4ri-release-20240729/m4/000077500000000000000000000000001465170556500146145ustar00rootroot00000000000000m4ri-release-20240729/m4/ax_cache_size.m4000066400000000000000000000107201465170556500176430ustar00rootroot00000000000000# =========================================================================== # http://autoconf-archive.cryp.to/ax_cache_size.html # =========================================================================== # # SYNOPSIS # # AX_CACHE_SIZE # # DESCRIPTION # # Find L1 and L2 caches size by reading the corresponding file on UNIX or # by requesting cpuid. The results are available in the substituted variables # M4RI_CPU_L1_CACHE and M4RI_CPU_L2_CACHE. # # This macro depends on AX_GCC_X86_CPUID, AC_PROG_SED, and AX_CPU_VENDOR. # # LAST MODIFICATION # # 2011-04-11 # # COPYLEFT # # Copyright (c) 2008 Christophe Tournayre # # Patched by: # # Copyright (c) 2008 Martin Albrecht # Copyright (c) 2008 Arnaud Bergeron # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. AC_DEFUN([AX_CACHE_SIZE], [ AC_REQUIRE([AC_PROG_SED]) AC_REQUIRE([AX_GCC_X86_CPUID]) AC_REQUIRE([AX_CPU_VENDOR]) AX_CPU_VENDOR ax_l1_size= ax_l2_size= #Check if the variable is present if test -e /sys/devices/system/cpu/cpu0/cache/index0/size; then for idx in `seq 0 3`; do if test -e /sys/devices/system/cpu/cpu0/cache/index$idx/size ; then level=`cat /sys/devices/system/cpu/cpu0/cache/index$idx/level` size=`cat /sys/devices/system/cpu/cpu0/cache/index$idx/size` eval CPU0\_L$level\_CACHE="$size" fi done ax_l1_size=$CPU0_L1_CACHE ax_l2_size=$CPU0_L2_CACHE ax_l3_size=$CPU0_L3_CACHE else if test "x$ax_cv_cpu_vendor" != "xUnknown"; then #Or use CPUID AX_GCC_X86_CPUID(0x80000000) cpu_exthigh=`echo $ax_cv_gcc_x86_cpuid_0x80000000 | cut -d ":" -f 1` if test "x$cpu_exthi" > "x80000004"; then AX_GCC_X86_CPUID(0x80000005) # For L1 cache l1_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000005 | cut -d ":" -f 4` ax_l1_size=$((0x$l1_hexval >> 24)) fi if test "x$cpu_exthi" > "x80000005"; then AX_GCC_X86_CPUID(0x80000006) # For L2 cache l2_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 3` ax_l2_size=$((0x$l2_hexval >> 16)) fi if test "x$cpu_exthi" > "x80000005"; then AX_GCC_X86_CPUID(0x80000006) # For L3 cache l2_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 4` ax_l2_size=$((0x$l2_hexval >> 18))*512 fi fi #Or use sysctl sysctl_exe= if test -x /usr/sbin/sysctl ; then sysctl_exe=/usr/sbin/sysctl elif test -x /sbin/sysctl ; then sysctl_exe=/sbin/sysctl fi if test -n "$sysctl_exe"; then if test -z "$ax_l2_size" -o "$ax_l2_size" = "0"; then sysctl_out=`$sysctl_exe -n hw.l2cachesize 2>/dev/null`; if test ! -z "$sysctl_out"; then ax_l2_size=$(($sysctl_out / 1024)) fi; fi if test -z "$ax_l1_size" -o "$ax_l1_size" = "0" ; then sysctl_out=`$sysctl_exe -n hw.l1dcachesize 2>/dev/null`; if test ! -z "$sysctl_out"; then ax_l1_size=$(($sysctl_out / 1024)) fi; fi if test -z "$ax_l1_size" -o "ax_l1_size" = "0" ; then sysctl_out=`$sysctl_exe -n hw.l1cachesize 2>/dev/null`; if test ! -z "$sysctl_out"; then ax_l1_size=$(($sysctl_out / 1024)) fi; fi fi fi test -z "$ax_l1_size" && ax_l1_size=0 test -z "$ax_l2_size" && ax_l2_size=0 test -z "$ax_l3_size" && ax_l3_size=$ax_l2_size # Keep only digits if there is a unit (ie 1024K -> 1024) and convert in Bytes AC_MSG_CHECKING(the L1 cache size) ax_l1_size=`echo $ax_l1_size | $SED 's/\([[0-9]]\)[[A-Za-z]]$/\1/g'` ax_l1_size=$(($ax_l1_size*1024)) AC_MSG_RESULT( $ax_l1_size Bytes) AC_MSG_CHECKING(the L2 cache size) ax_l2_size=`echo $ax_l2_size | $SED 's/\([[0-9]]\)[[A-Za-z]]$/\1/g'` ax_l2_size=$(($ax_l2_size*1024)) AC_MSG_RESULT( $ax_l2_size Bytes) AC_MSG_CHECKING(the L3 cache size) ax_l3_size=`echo $ax_l3_size | $SED 's/\([[0-9]]\)[[A-Za-z]]$/\1/g'` ax_l3_size=$(($ax_l3_size*1024)) AC_MSG_RESULT( $ax_l3_size Bytes) M4RI_CPU_L1_CACHE=${ax_l1_size} M4RI_CPU_L2_CACHE=${ax_l2_size} M4RI_CPU_L3_CACHE=${ax_l3_size} AC_SUBST(M4RI_CPU_L1_CACHE) AC_SUBST(M4RI_CPU_L2_CACHE) AC_SUBST(M4RI_CPU_L3_CACHE) ]) m4ri-release-20240729/m4/ax_cache_size_tune.m4000066400000000000000000000121141465170556500206750ustar00rootroot00000000000000# SYNOPSIS # # AX_CACHE_SIZE_TUNE # # DESCRIPTION # # Find L1, L2, L3 caches size by running some timing experiments. # The results are available in the defines __M4RI_CPU_L1_CACHE, # __M4RI_CPU_L2_CACHE and __M4RI_CPU_L3_CACHE. # # This macro depends on AC_PROG_SED, AC_PROG_CC. # # LAST MODIFICATION # # 2011-04-11 # # COPYLEFT # # Copyright (c) 2009,2010 Martin Albrecht # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. AC_DEFUN([AX_CACHE_SIZE_TUNE], [ AC_REQUIRE([AC_PROG_CC]) AC_REQUIRE([AC_PROG_SED]) AC_LANG_PUSH([C]) AC_CACHE_CHECK(for cache sizes, ax_cv_cache_sizes, [AC_RUN_IFELSE([AC_LANG_PROGRAM([[ #include #include #include #include double walltime(double t0) { double mic, time; double mega = 0.000001; struct timeval tp; static long base_sec = 0; static long base_usec = 0; (void) gettimeofday(&tp,NULL); if (base_sec == 0) { base_sec = tp.tv_sec; base_usec = tp.tv_usec; } time = (double) (tp.tv_sec - base_sec); mic = (double) (tp.tv_usec - base_usec); time = (time + mic * mega) - t0; return(time); } double run_experiment(size_t size, size_t trials) { size_t i,j; unsigned long *a = (unsigned long*)malloc(size/4); unsigned long *b = (unsigned long*)malloc(size/4); unsigned long *c = (unsigned long*)malloc(size/4); unsigned long *d = (unsigned long*)malloc(size/4); size_t n = size/4/(sizeof(unsigned long)); /* we setup a lookup table with a random-ish pattern */ a[0] = 1337; b[0] = 5345345; for(j=1; j 0.25) { _trials = _trials/2; mult = 2*mult; wt /= 2.0; result /= 2.0; } } printf("\n"); } for(i=0;i dtimes[0][max] ) { max = i; } } return candidates[max-1]; } ]], [[ const size_t c1[] = { 4, 8, 16, 32, 64, 128}; const size_t c2[] = { 128, 256, 512}; const size_t c3[] = {1024,1536,2048,3072,4096,6144,8192,16384,32768}; FILE *f; printf("\n"); size_t _l1 = cache_size(c1, 6, 1ULL<<15); size_t _l2 = cache_size(c2, 3, 1ULL<<12); size_t _l3 = cache_size(c3, 9, 1ULL<< 9); f = fopen("conftest_cache_sizes", "w"); if (!f) return 1; fprintf(f,"%lu:%lu:%lu\n",(unsigned long)(_l1*1024),(unsigned long)(_l2*1024),(unsigned long)(_l3*1024)); fclose(f); return 0; ]])], [ax_cv_cache_sizes=`cat conftest_cache_sizes`; rm -f conftest_cache_sizes], [ax_cv_cache_sizes=unknown; rm -f conftest_cache_sizes], [ax_cv_cache_sizes=unknown])]) AC_LANG_POP([C]) AC_MSG_CHECKING(the L1 cache size) ax_l1_size=`echo $ax_cv_cache_sizes | cut -d ':' -f 1` AC_MSG_RESULT( $ax_l1_size Bytes) AC_MSG_CHECKING(the L2 cache size) ax_l2_size=`echo $ax_cv_cache_sizes | cut -d ':' -f 2` AC_MSG_RESULT( $ax_l2_size Bytes) AC_MSG_CHECKING(the L3 cache size) ax_l3_size=`echo $ax_cv_cache_sizes | cut -d ':' -f 3` AC_MSG_RESULT( $ax_l3_size Bytes) M4RI_CPU_L1_CACHE=${ax_l1_size} M4RI_CPU_L2_CACHE=${ax_l2_size} M4RI_CPU_L3_CACHE=${ax_l3_size} AC_SUBST(M4RI_CPU_L1_CACHE) AC_SUBST(M4RI_CPU_L2_CACHE) AC_SUBST(M4RI_CPU_L3_CACHE) ]) m4ri-release-20240729/m4/ax_cpu_vendor.m4000066400000000000000000000027121465170556500177140ustar00rootroot00000000000000# =========================================================================== # http://autoconf-archive.cryp.to/ax_cpu_vendor.html # =========================================================================== # # SYNOPSIS # # AX_CPU_VENDOR # # DESCRIPTION # # Find your CPU's vendor by requesting cpuid and define "ax_cv_cpu_vendor" # accordingly. This macro depends on AX_GCC_X86_CPUID. # # LAST MODIFICATION # # 2008-04-12 # # COPYLEFT # # Copyright (c) 2008 Christophe Tournayre # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. AC_DEFUN([AX_CPU_VENDOR], [ AC_REQUIRE([AX_GCC_X86_CPUID]) AX_GCC_X86_CPUID(0x0) AC_CACHE_CHECK(for the processor vendor, ax_cv_cpu_vendor, [ vendor=`echo $ax_cv_gcc_x86_cpuid_0x0 | cut -d ":" -f 2` case $vendor in 756e6547*) ax_cv_cpu_vendor="Intel" ;; 68747541*) ax_cv_cpu_vendor="AMD" ;; 69727943*) ax_cv_cpu_vendor="Cyrix" ;; 746e6543*) ax_cv_cpu_vendor="IDT" ;; 646f6547*) ax_cv_cpu_vendor="Natsemi Geode" ;; 52697365*) ax_cv_cpu_vendor="Rise" ;; 65736952*) ax_cv_cpu_vendor="Rise" ;; 20536953*) ax_cv_cpu_vendor="SiS" ;; *) ax_cv_cpu_vendor="Unknown" ;; esac ]) ]) m4ri-release-20240729/m4/ax_func_posix_memalign.m4000066400000000000000000000030211465170556500215700ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_func_posix_memalign.html # =========================================================================== # # SYNOPSIS # # AX_FUNC_POSIX_MEMALIGN # # DESCRIPTION # # Some versions of posix_memalign (notably glibc 2.2.5) incorrectly apply # their power-of-two check to the size argument, not the alignment # argument. AX_FUNC_POSIX_MEMALIGN defines HAVE_POSIX_MEMALIGN if the # power-of-two check is correctly applied to the alignment argument. # # LICENSE # # Copyright (c) 2008 Scott Pakin # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 9 AC_DEFUN([AX_FUNC_POSIX_MEMALIGN], [AC_CACHE_CHECK([for working posix_memalign], [ax_cv_func_posix_memalign_works], [AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include int main () { void *buffer; /* Some versions of glibc incorrectly perform the alignment check on * the size word. */ exit (posix_memalign (&buffer, sizeof(void *), 123) != 0); } ]])], [ax_cv_func_posix_memalign_works=yes], [ax_cv_func_posix_memalign_works=no], [ax_cv_func_posix_memalign_works=no])]) if test "$ax_cv_func_posix_memalign_works" = "yes" ; then AC_DEFINE([HAVE_POSIX_MEMALIGN], [1], [Define to 1 if `posix_memalign' works.]) fi ]) m4ri-release-20240729/m4/ax_guess_path_header.m4000066400000000000000000000035441465170556500212260ustar00rootroot00000000000000# # SYNOPSIS # # AX_GUESS_PATH_HEADER([foo.h]) # # DESCRIPTION # # Search for header foo.h in -Ipath's found in CPPFLAGS and CFLAGS and set FOO_H_PATH to # the full directory path where foo.h was found. # If no header is found in the paths given in CPPFLAGS and CFLAGS, then lastly it looks in /usr/local/include. # # LAST MODIFICATION # # 2011-04-11 # # COPYLEFT # # Copyright (c) 2011 Carlo Wood # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. AC_DEFUN([AX_GUESS_PATH_HEADER], [ function cw_search_header_path { n=2 while test $n -le [$]#; do eval arg=\$"$n" case "$arg" in -I*) path="`echo "$arg" | sed -e 's/-I//'`" if test -e "$path/$1"; then echo "$path" return fi ;; esac n=$((n+1)) done if test -e "/usr/local/include/$1"; then echo "/usr/local/include" fi } have_realpath=`which realpath` cw_headername_uppercase=`echo "m4_toupper([$1])" | sed -e 's/[[^A-Z]]/_/g'` AC_CACHE_CHECK([if we can find [$1]], [cw_cv_"$[]cw_headername_uppercase"_path], [ cw_header_path=`eval cw_search_header_path [$1] $CPPFLAGS $CFLAGS` if test -n "$cw_header_path"; then if test "x$have_realpath" != "x"; then eval cw_cv_"$cw_headername_uppercase"_path=`realpath -s "$cw_header_path"` else eval cw_cv_"$cw_headername_uppercase"_path="$cw_header_path" fi else eval cw_cv_"$cw_headername_uppercase"_path="no" fi ]) if eval test \"\$cw_cv_"$cw_headername_uppercase"_path\" = "no"; then eval "$cw_headername_uppercase"_PATH="" else eval "$cw_headername_uppercase"_PATH=\"\$cw_cv_"$cw_headername_uppercase"_path\" fi ]) m4ri-release-20240729/m4/ax_guess_path_lib.m4000066400000000000000000000034431465170556500205420ustar00rootroot00000000000000# # SYNOPSIS # # AX_GUESS_PATH_LIB([foo]) # # DESCRIPTION # # Search for library foo in -Lpath's found in LDFLAGS and set LIBFOO_PATH to # the full directory path where libfoo.so was found. # If no library is found in paths given in LDFLAGS, then lastly it looks in /usr/local/lib. # # LAST MODIFICATION # # 2011-04-11 # # COPYLEFT # # Copyright (c) 2011 Carlo Wood # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. AC_DEFUN([AX_GUESS_PATH_LIB], [ function cw_search_library_path { n=2 while test $n -le [$]#; do eval arg=\$"$n" case "$arg" in -L*) path="`echo "$arg" | sed -e 's/-L//'`" if test -e "$path/lib$1.so"; then echo "$path" return fi ;; esac n=$((n+1)) done if test -e "/usr/local/lib/lib$1.so"; then echo "/usr/local/lib" fi } have_realpath=`which realpath` cw_libname_uppercase="m4_toupper([$1])" AC_CACHE_CHECK([if we can find lib[$1].so], [cw_cv_lib"$[]cw_libname_uppercase"_path], [ cw_library_path=`eval cw_search_library_path [$1] $LDFLAGS` if test -n "$cw_library_path"; then if test "x$have_realpath" != "x"; then eval cw_cv_lib"$cw_libname_uppercase"_path=`realpath -s "$cw_library_path"` else eval cw_cv_lib"$cw_libname_uppercase"_path="$cw_library_path" fi else eval cw_cv_lib"$cw_libname_uppercase"_path="no" fi ]) if eval test \"\$cw_cv_lib"$cw_libname_uppercase"_path\" = "no"; then eval LIB"$cw_libname_uppercase"_PATH="" else eval LIB"$cw_libname_uppercase"_PATH=\"\$cw_cv_lib"$cw_libname_uppercase"_path\" fi ]) m4ri-release-20240729/m4/m4_ax_check_compile_flag.m4000066400000000000000000000040701465170556500217250ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html # =========================================================================== # # SYNOPSIS # # AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) # # DESCRIPTION # # Check whether the given FLAG works with the current language's compiler # or gives an error. (Warnings, however, are ignored) # # ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on # success/failure. # # If EXTRA-FLAGS is defined, it is added to the current language's default # flags (e.g. CFLAGS) when the check is done. The check is thus made with # the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to # force the compiler to issue an error when a bad flag is given. # # INPUT gives an alternative input source to AC_COMPILE_IFELSE. # # NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this # macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. # # LICENSE # # Copyright (c) 2008 Guido U. Draheim # Copyright (c) 2011 Maarten Bosmans # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 6 AC_DEFUN([AX_CHECK_COMPILE_FLAG], [AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], [AS_VAR_SET(CACHEVAR,[yes])], [AS_VAR_SET(CACHEVAR,[no])]) _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) AS_VAR_IF(CACHEVAR,yes, [m4_default([$2], :)], [m4_default([$3], :)]) AS_VAR_POPDEF([CACHEVAR])dnl ])dnl AX_CHECK_COMPILE_FLAGS m4ri-release-20240729/m4/m4_ax_ext.m4000066400000000000000000000361161465170556500167550ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_ext.html # =========================================================================== # # SYNOPSIS # # AX_EXT # # DESCRIPTION # # Find supported SIMD extensions by requesting cpuid. When a SIMD # extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if # compiler supports it. For example, if "sse2" is available then "-msse2" # is added to SIMD_FLAGS. # # Find other supported CPU extensions by requesting cpuid. When a # processor extension is found, the -m"extensionname" is added to # CPUEXT_FLAGS if compiler supports it. For example, if "bmi2" is # available then "-mbmi2" is added to CPUEXT_FLAGS. # # This macro calls: # # AC_SUBST(SIMD_FLAGS) # AC_SUBST(CPUEXT_FLAGS) # # And defines: # # HAVE_RDRND / HAVE_BMI1 / HAVE_BMI2 / HAVE_ADX / HAVE_MPX # HAVE_PREFETCHWT1 / HAVE_ABM / HAVE_MMX / HAVE_SSE / HAVE_SSE2 # HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4_1 / HAVE_SSE4_2 / HAVE_SSE4a # HAVE_SHA / HAVE_AES / HAVE_AVX / HAVE_FMA3 / HAVE_FMA4 / HAVE_XOP # HAVE_AVX2 / HAVE_AVX512_F / HAVE_AVX512_CD / HAVE_AVX512_PF # HAVE_AVX512_ER / HAVE_AVX512_VL / HAVE_AVX512_BW / HAVE_AVX512_DQ # HAVE_AVX512_IFMA / HAVE_AVX512_VBMI / HAVE_ALTIVEC / HAVE_VSX # # LICENSE # # Copyright (c) 2007 Christophe Tournayre # Copyright (c) 2013,2015 Michael Petch # Copyright (c) 2017 Rafael de Lucena Valle # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 18 AC_DEFUN([AX_EXT], [ AC_REQUIRE([AC_CANONICAL_HOST]) AC_REQUIRE([AC_PROG_CC]) CPUEXT_FLAGS="" SIMD_FLAGS="" case $host_cpu in powerpc*) AC_CACHE_CHECK([whether altivec is supported for old distros], [ax_cv_have_altivec_old_ext], [ if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then ax_cv_have_altivec_old_ext=yes fi fi ]) if test "$ax_cv_have_altivec_old_ext" = yes; then AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions]) AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", []) fi AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext], [ if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c altivec` != 0; then ax_cv_have_altivec_ext=yes fi ]) if test "$ax_cv_have_altivec_ext" = yes; then AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions]) AX_CHECK_COMPILE_FLAG(-maltivec, SIMD_FLAGS="$SIMD_FLAGS -maltivec", []) fi AC_CACHE_CHECK([whether vsx is supported], [ax_cv_have_vsx_ext], [ if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c vsx` != 0; then ax_cv_have_vsx_ext=yes fi ]) if test "$ax_cv_have_vsx_ext" = yes; then AC_DEFINE(HAVE_VSX,,[Support VSX instructions]) AX_CHECK_COMPILE_FLAG(-mvsx, SIMD_FLAGS="$SIMD_FLAGS -mvsx", []) fi ;; i[[3456]]86*|x86_64*|amd64*) AC_REQUIRE([AX_GCC_X86_CPUID]) AC_REQUIRE([AX_GCC_X86_CPUID_COUNT]) AC_REQUIRE([AX_GCC_X86_AVX_XGETBV]) eax_cpuid0=0 AX_GCC_X86_CPUID(0x00000000) if test "$ax_cv_gcc_x86_cpuid_0x00000000" != "unknown"; then eax_cpuid0=`echo $ax_cv_gcc_x86_cpuid_0x00000000 | cut -d ":" -f 1` fi eax_cpuid80000000=0 AX_GCC_X86_CPUID(0x80000000) if test "$ax_cv_gcc_x86_cpuid_0x80000000" != "unknown"; then eax_cpuid80000000=`echo $ax_cv_gcc_x86_cpuid_0x80000000 | cut -d ":" -f 1` fi ecx_cpuid1=0 edx_cpuid1=0 if test "$((0x$eax_cpuid0))" -ge 1 ; then AX_GCC_X86_CPUID(0x00000001) if test "$ax_cv_gcc_x86_cpuid_0x00000001" != "unknown"; then ecx_cpuid1=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3` edx_cpuid1=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4` fi fi ebx_cpuid7=0 ecx_cpuid7=0 if test "$((0x$eax_cpuid0))" -ge 7 ; then AX_GCC_X86_CPUID_COUNT(0x00000007, 0x00) if test "$ax_cv_gcc_x86_cpuid_0x00000007" != "unknown"; then ebx_cpuid7=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 2` ecx_cpuid7=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 3` fi fi ecx_cpuid80000001=0 edx_cpuid80000001=0 if test "$((0x$eax_cpuid80000000))" -ge "$((0x80000001))" ; then AX_GCC_X86_CPUID(0x80000001) if test "$ax_cv_gcc_x86_cpuid_0x80000001" != "unknown"; then ecx_cpuid80000001=`echo $ax_cv_gcc_x86_cpuid_0x80000001 | cut -d ":" -f 3` edx_cpuid80000001=`echo $ax_cv_gcc_x86_cpuid_0x80000001 | cut -d ":" -f 4` fi fi AC_CACHE_VAL([ax_cv_have_mmx_os_support_ext], [ ax_cv_have_mmx_os_support_ext=yes ]) ax_cv_have_none_os_support_ext=yes AC_CACHE_VAL([ax_cv_have_sse_os_support_ext], [ ax_cv_have_sse_os_support_ext=no, if test "$((0x$edx_cpuid1>>25&0x01))" = 1; then AC_LANG_PUSH([C]) AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include #include /* No way at ring1 to ring3 in protected mode to check the CR0 and CR4 control registers directly. Execute an SSE instruction. If it raises SIGILL then OS doesn't support SSE based instructions */ void sig_handler(int signum){ exit(1); } int main(){ signal(SIGILL, sig_handler); /* SSE instruction xorps %xmm0,%xmm0 */ __asm__ __volatile__ (".byte 0x0f, 0x57, 0xc0"); return 0; }]])], [ax_cv_have_sse_os_support_ext=yes], [ax_cv_have_sse_os_support_ext=no], [ax_cv_have_sse_os_support_ext=no]) AC_LANG_POP([C]) fi ]) xgetbv_eax=0 if test "$((0x$ecx_cpuid1>>28&0x01))" = 1; then AX_GCC_X86_AVX_XGETBV(0x00000000) if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1` fi AC_CACHE_VAL([ax_cv_have_avx_os_support_ext], [ ax_cv_have_avx_os_support_ext=no if test "$((0x$ecx_cpuid1>>27&0x01))" = 1; then if test "$((0x$xgetbv_eax&0x6))" = 6; then ax_cv_have_avx_os_support_ext=yes fi fi ]) fi AC_CACHE_VAL([ax_cv_have_avx512_os_support_ext], [ ax_cv_have_avx512_os_support_ext=no if test "$ax_cv_have_avx_os_support_ext" = yes; then if test "$((0x$xgetbv_eax&0xe6))" = "$((0xe6))"; then ax_cv_have_avx512_os_support_ext=yes fi fi ]) for ac_instr_info dnl in "none;rdrnd;RDRND;ecx_cpuid1,30;-mrdrnd;HAVE_RDRND;CPUEXT_FLAGS" dnl "none;bmi1;BMI1;ebx_cpuid7,3;-mbmi;HAVE_BMI1;CPUEXT_FLAGS" dnl "none;bmi2;BMI2;ebx_cpuid7,8;-mbmi2;HAVE_BMI2;CPUEXT_FLAGS" dnl "none;adx;ADX;ebx_cpuid7,19;-madx;HAVE_ADX;CPUEXT_FLAGS" dnl "none;mpx;MPX;ebx_cpuid7,14;-mmpx;HAVE_MPX;CPUEXT_FLAGS" dnl "none;prefetchwt1;PREFETCHWT1;ecx_cpuid7,0;-mprefetchwt1;HAVE_PREFETCHWT1;CPUEXT_FLAGS" dnl "none;abm;ABM;ecx_cpuid80000001,5;-mabm;HAVE_ABM;CPUEXT_FLAGS" dnl "mmx;mmx;MMX;edx_cpuid1,23;-mmmx;HAVE_MMX;SIMD_FLAGS" dnl "sse;sse;SSE;edx_cpuid1,25;-msse;HAVE_SSE;SIMD_FLAGS" dnl "sse;sse2;SSE2;edx_cpuid1,26;-msse2;HAVE_SSE2;SIMD_FLAGS" dnl "sse;sse3;SSE3;ecx_cpuid1,1;-msse3;HAVE_SSE3;SIMD_FLAGS" dnl "sse;ssse3;SSSE3;ecx_cpuid1,9;-mssse3;HAVE_SSSE3;SIMD_FLAGS" dnl "sse;sse41;SSE4.1;ecx_cpuid1,19;-msse4.1;HAVE_SSE4_1;SIMD_FLAGS" dnl "sse;sse42;SSE4.2;ecx_cpuid1,20;-msse4.2;HAVE_SSE4_2;SIMD_FLAGS" dnl "sse;sse4a;SSE4a;ecx_cpuid80000001,6;-msse4a;HAVE_SSE4a;SIMD_FLAGS" dnl "sse;sha;SHA;ebx_cpuid7,29;-msha;HAVE_SHA;SIMD_FLAGS" dnl "sse;aes;AES;ecx_cpuid1,25;-maes;HAVE_AES;SIMD_FLAGS" dnl "avx;avx;AVX;ecx_cpuid1,28;-mavx;HAVE_AVX;SIMD_FLAGS" dnl "avx;fma3;FMA3;ecx_cpuid1,12;-mfma;HAVE_FMA3;SIMD_FLAGS" dnl "avx;fma4;FMA4;ecx_cpuid80000001,16;-mfma4;HAVE_FMA4;SIMD_FLAGS" dnl "avx;xop;XOP;ecx_cpuid80000001,11;-mxop;HAVE_XOP;SIMD_FLAGS" dnl "avx;avx2;AVX2;ebx_cpuid7,5;-mavx2;HAVE_AVX2;SIMD_FLAGS" dnl "avx512;avx512f;AVX512-F;ebx_cpuid7,16;-mavx512f;HAVE_AVX512_F;SIMD_FLAGS" dnl "avx512;avx512cd;AVX512-CD;ebx_cpuid7,28;-mavx512cd;HAVE_AVX512_CD;SIMD_FLAGS" dnl "avx512;avx512pf;AVX512-PF;ebx_cpuid7,26;-mavx512pf;HAVE_AVX512_PF;SIMD_FLAGS" dnl "avx512;avx512er;AVX512-ER;ebx_cpuid7,27;-mavx512er;HAVE_AVX512_ER;SIMD_FLAGS" dnl "avx512;avx512vl;AVX512-VL;ebx_cpuid7,31;-mavx512vl;HAVE_AVX512_VL;SIMD_FLAGS" dnl "avx512;avx512bw;AVX512-BW;ebx_cpuid7,30;-mavx512bw;HAVE_AVX512_BW;SIMD_FLAGS" dnl "avx512;avx512dq;AVX512-DQ;ebx_cpuid7,17;-mavx512dq;HAVE_AVX512_DQ;SIMD_FLAGS" dnl "avx512;avx512ifma;AVX512-IFMA;ebx_cpuid7,21;-mavx512ifma;HAVE_AVX512_IFMA;SIMD_FLAGS" dnl "avx512;avx512vbmi;AVX512-VBMI;ecx_cpuid7,1;-mavx512vbmi;HAVE_AVX512_VBMI;SIMD_FLAGS" dnl # do ac_instr_os_support=$(eval echo \$ax_cv_have_$(echo $ac_instr_info | cut -d ";" -f 1)_os_support_ext) ac_instr_acvar=$(echo $ac_instr_info | cut -d ";" -f 2) ac_instr_shortname=$(echo $ac_instr_info | cut -d ";" -f 3) ac_instr_chk_loc=$(echo $ac_instr_info | cut -d ";" -f 4) ac_instr_chk_reg=0x$(eval echo \$$(echo $ac_instr_chk_loc | cut -d "," -f 1)) ac_instr_chk_bit=$(echo $ac_instr_chk_loc | cut -d "," -f 2) ac_instr_compiler_flags=$(echo $ac_instr_info | cut -d ";" -f 5) ac_instr_have_define=$(echo $ac_instr_info | cut -d ";" -f 6) ac_instr_flag_type=$(echo $ac_instr_info | cut -d ";" -f 7) AC_CACHE_CHECK([whether ${ac_instr_shortname} is supported by the processor], [ax_cv_have_${ac_instr_acvar}_cpu_ext], [ eval ax_cv_have_${ac_instr_acvar}_cpu_ext=no if test "$((${ac_instr_chk_reg}>>${ac_instr_chk_bit}&0x01))" = 1 ; then eval ax_cv_have_${ac_instr_acvar}_cpu_ext=yes fi ]) if test x"$(eval echo \$ax_cv_have_${ac_instr_acvar}_cpu_ext)" = x"yes"; then AC_CACHE_CHECK([whether ${ac_instr_shortname} is supported by the processor and OS], [ax_cv_have_${ac_instr_acvar}_ext], [ eval ax_cv_have_${ac_instr_acvar}_ext=no if test x"${ac_instr_os_support}" = x"yes"; then eval ax_cv_have_${ac_instr_acvar}_ext=yes fi ]) if test "$(eval echo \$ax_cv_have_${ac_instr_acvar}_ext)" = yes; then AX_CHECK_COMPILE_FLAG(${ac_instr_compiler_flags}, eval ax_cv_support_${ac_instr_acvar}_ext=yes, eval ax_cv_support_${ac_instr_acvar}_ext=no) if test x"$(eval echo \$ax_cv_support_${ac_instr_acvar}_ext)" = x"yes"; then eval ${ac_instr_flag_type}=\"\$${ac_instr_flag_type} ${ac_instr_compiler_flags}\" AC_DEFINE_UNQUOTED([${ac_instr_have_define}]) else AC_MSG_WARN([Your processor and OS supports ${ac_instr_shortname} instructions but not your compiler, can you try another compiler?]) fi else if test x"${ac_instr_os_support}" = x"no"; then AC_CACHE_VAL(ax_cv_support_${ac_instr_acvar}_ext, eval ax_cv_support_${ac_instr_acvar}_ext=no) AC_MSG_WARN([Your processor supports ${ac_instr_shortname}, but your OS doesn't]) fi fi else AC_CACHE_VAL(ax_cv_have_${ac_instr_acvar}_ext, eval ax_cv_have_${ac_instr_acvar}_ext=no) AC_CACHE_VAL(ax_cv_support_${ac_instr_acvar}_ext, eval ax_cv_support_${ac_instr_acvar}_ext=no) fi done ;; esac AH_TEMPLATE([HAVE_RDRND],[Define to 1 to support Digital Random Number Generator]) AH_TEMPLATE([HAVE_BMI1],[Define to 1 to support Bit Manipulation Instruction Set 1]) AH_TEMPLATE([HAVE_BMI2],[Define to 1 to support Bit Manipulation Instruction Set 2]) AH_TEMPLATE([HAVE_ADX],[Define to 1 to support Multi-Precision Add-Carry Instruction Extensions]) AH_TEMPLATE([HAVE_MPX],[Define to 1 to support Memory Protection Extensions]) AH_TEMPLATE([HAVE_PREFETCHWT1],[Define to 1 to support Prefetch Vector Data Into Caches WT1]) AH_TEMPLATE([HAVE_ABM],[Define to 1 to support Advanced Bit Manipulation]) AH_TEMPLATE([HAVE_MMX],[Define to 1 to support Multimedia Extensions]) AH_TEMPLATE([HAVE_SSE],[Define to 1 to support Streaming SIMD Extensions]) AH_TEMPLATE([HAVE_SSE2],[Define to 1 to support Streaming SIMD Extensions]) AH_TEMPLATE([HAVE_SSE3],[Define to 1 to support Streaming SIMD Extensions 3]) AH_TEMPLATE([HAVE_SSSE3],[Define to 1 to support Supplemental Streaming SIMD Extensions 3]) AH_TEMPLATE([HAVE_SSE4_1],[Define to 1 to support Streaming SIMD Extensions 4.1]) AH_TEMPLATE([HAVE_SSE4_2],[Define to 1 to support Streaming SIMD Extensions 4.2]) AH_TEMPLATE([HAVE_SSE4a],[Define to 1 to support AMD Streaming SIMD Extensions 4a]) AH_TEMPLATE([HAVE_SHA],[Define to 1 to support Secure Hash Algorithm Extension]) AH_TEMPLATE([HAVE_AES],[Define to 1 to support Advanced Encryption Standard New Instruction Set (AES-NI)]) AH_TEMPLATE([HAVE_AVX],[Define to 1 to support Advanced Vector Extensions]) AH_TEMPLATE([HAVE_FMA3],[Define to 1 to support Fused Multiply-Add Extensions 3]) AH_TEMPLATE([HAVE_FMA4],[Define to 1 to support Fused Multiply-Add Extensions 4]) AH_TEMPLATE([HAVE_XOP],[Define to 1 to support eXtended Operations Extensions]) AH_TEMPLATE([HAVE_AVX2],[Define to 1 to support Advanced Vector Extensions 2]) AH_TEMPLATE([HAVE_AVX512_F],[Define to 1 to support AVX-512 Foundation Extensions]) AH_TEMPLATE([HAVE_AVX512_CD],[Define to 1 to support AVX-512 Conflict Detection Instructions]) AH_TEMPLATE([HAVE_AVX512_PF],[Define to 1 to support AVX-512 Conflict Prefetch Instructions]) AH_TEMPLATE([HAVE_AVX512_ER],[Define to 1 to support AVX-512 Exponential & Reciprocal Instructions]) AH_TEMPLATE([HAVE_AVX512_VL],[Define to 1 to support AVX-512 Vector Length Extensions]) AH_TEMPLATE([HAVE_AVX512_BW],[Define to 1 to support AVX-512 Byte and Word Instructions]) AH_TEMPLATE([HAVE_AVX512_DQ],[Define to 1 to support AVX-512 Doubleword and Quadword Instructions]) AH_TEMPLATE([HAVE_AVX512_IFMA],[Define to 1 to support AVX-512 Integer Fused Multiply Add Instructions]) AH_TEMPLATE([HAVE_AVX512_VBMI],[Define to 1 to support AVX-512 Vector Byte Manipulation Instructions]) AC_SUBST(SIMD_FLAGS) AC_SUBST(CPUEXT_FLAGS) ]) m4ri-release-20240729/m4/m4_ax_gcc_x86_avx_xgetbv.m4000066400000000000000000000064221465170556500216500ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html # =========================================================================== # # SYNOPSIS # # AX_GCC_X86_AVX_XGETBV # # DESCRIPTION # # On later x86 processors with AVX SIMD support, with gcc or a compiler # that has a compatible syntax for inline assembly instructions, run a # small program that executes the xgetbv instruction with input OP. This # can be used to detect if the OS supports AVX instruction usage. # # On output, the values of the eax and edx registers are stored as # hexadecimal strings as "eax:edx" in the cache variable # ax_cv_gcc_x86_avx_xgetbv. # # If the xgetbv instruction fails (because you are running a # cross-compiler, or because you are not using gcc, or because you are on # a processor that doesn't have this instruction), # ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown". # # This macro mainly exists to be used in AX_EXT. # # LICENSE # # Copyright (c) 2013 Michael Petch # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Archive. When you make and distribute a # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. #serial 3 AC_DEFUN([AX_GCC_X86_AVX_XGETBV], [AC_REQUIRE([AC_PROG_CC]) AC_LANG_PUSH([C]) AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1, [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ int op = $1, eax, edx; FILE *f; /* Opcodes for xgetbv */ __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (op)); f = fopen("conftest_xgetbv", "w"); if (!f) return 1; fprintf(f, "%x:%x\n", eax, edx); fclose(f); return 0; ])], [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv], [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv], [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])]) AC_LANG_POP([C]) ]) m4ri-release-20240729/m4/m4_ax_gcc_x86_cpuid.m4000066400000000000000000000071671465170556500206060ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html # =========================================================================== # # SYNOPSIS # # AX_GCC_X86_CPUID(OP) # AX_GCC_X86_CPUID_COUNT(OP, COUNT) # # DESCRIPTION # # On Pentium and later x86 processors, with gcc or a compiler that has a # compatible syntax for inline assembly instructions, run a small program # that executes the cpuid instruction with input OP. This can be used to # detect the CPU type. AX_GCC_X86_CPUID_COUNT takes an additional COUNT # parameter that gets passed into register ECX before calling cpuid. # # On output, the values of the eax, ebx, ecx, and edx registers are stored # as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable # ax_cv_gcc_x86_cpuid_OP. # # If the cpuid instruction fails (because you are running a # cross-compiler, or because you are not using gcc, or because you are on # a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP # is set to the string "unknown". # # This macro mainly exists to be used in AX_GCC_ARCHFLAG. # # LICENSE # # Copyright (c) 2008 Steven G. Johnson # Copyright (c) 2008 Matteo Frigo # Copyright (c) 2015 Michael Petch # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Archive. When you make and distribute a # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. #serial 10 AC_DEFUN([AX_GCC_X86_CPUID], [AX_GCC_X86_CPUID_COUNT($1, 0) ]) AC_DEFUN([AX_GCC_X86_CPUID_COUNT], [AC_REQUIRE([AC_PROG_CC]) AC_LANG_PUSH([C]) AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1, [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ int op = $1, level = $2, eax, ebx, ecx, edx; FILE *f; __asm__ __volatile__ ("xchg %%ebx, %1\n" "cpuid\n" "xchg %%ebx, %1\n" : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) : "a" (op), "2" (level)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ])], [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid], [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid], [ax_cv_gcc_x86_cpuid_$1=unknown])]) AC_LANG_POP([C]) ]) m4ri-release-20240729/m4/m4_ax_openmp.m4000066400000000000000000000112141465170556500174430ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_openmp.html # =========================================================================== # # SYNOPSIS # # AX_OPENMP([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) # # DESCRIPTION # # This macro tries to find out how to compile programs that use OpenMP a # standard API and set of compiler directives for parallel programming # (see http://www-unix.mcs/) # # On success, it sets the OPENMP_CFLAGS/OPENMP_CXXFLAGS/OPENMP_F77FLAGS # output variable to the flag (e.g. -omp) used both to compile *and* link # OpenMP programs in the current language. # # NOTE: You are assumed to not only compile your program with these flags, # but also link it with them as well. # # If you want to compile everything with OpenMP, you should set: # # CFLAGS="$CFLAGS $OPENMP_CFLAGS" # #OR# CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS" # #OR# FFLAGS="$FFLAGS $OPENMP_FFLAGS" # # (depending on the selected language). # # The user can override the default choice by setting the corresponding # environment variable (e.g. OPENMP_CFLAGS). # # ACTION-IF-FOUND is a list of shell commands to run if an OpenMP flag is # found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is # not found. If ACTION-IF-FOUND is not specified, the default action will # define HAVE_OPENMP. # # LICENSE # # Copyright (c) 2008 Steven G. Johnson # Copyright (c) 2015 John W. Peterson # Copyright (c) 2016 Nick R. Papior # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Archive. When you make and distribute a # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. #serial 13 AC_DEFUN([AX_OPENMP], [ AC_PREREQ([2.69]) dnl for _AC_LANG_PREFIX AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown # Flags to try: -fopenmp (gcc), -mp (SGI & PGI), # -qopenmp (icc>=15), -openmp (icc), # -xopenmp (Sun), -omp (Tru64), # -qsmp=omp (AIX), # none ax_openmp_flags="-fopenmp -openmp -qopenmp -mp -xopenmp -omp -qsmp=omp none" if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags" fi for ax_openmp_flag in $ax_openmp_flags; do case $ax_openmp_flag in none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;; *) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;; esac AC_LINK_IFELSE([AC_LANG_SOURCE([[ @%:@include static void parallel_fill(int * data, int n) { int i; @%:@pragma omp parallel for for (i = 0; i < n; ++i) data[i] = i; } int main() { int arr[100000]; omp_set_num_threads(2); parallel_fill(arr, 100000); return 0; } ]])],[ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break],[]) done []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS ]) if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" = "xunknown"; then m4_default([$2],:) else if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" != "xnone"; then OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ax_cv_[]_AC_LANG_ABBREV[]_openmp fi m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])]) fi ])dnl AX_OPENMP m4ri-release-20240729/m4/pkg.m4000066400000000000000000000130231465170556500156360ustar00rootroot00000000000000# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- # serial 1 (pkg-config-0.24) # # Copyright © 2004 Scott James Remnant . # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # PKG_PROG_PKG_CONFIG([MIN-VERSION]) # ---------------------------------- AC_DEFUN([PKG_PROG_PKG_CONFIG], [m4_pattern_forbid([^_?PKG_[A-Z_]+$]) m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) fi if test -n "$PKG_CONFIG"; then _pkg_min_version=m4_default([$1], [0.9.0]) AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) PKG_CONFIG="" fi fi[]dnl ])# PKG_PROG_PKG_CONFIG # PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) # # Check to see whether a particular set of modules exists. Similar # to PKG_CHECK_MODULES(), but does not set variables or print errors. # # Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) # only at the first occurence in configure.ac, so if the first place # it's called might be skipped (such as if it is within an "if", you # have to call PKG_CHECK_EXISTS manually # -------------------------------------------------------------- AC_DEFUN([PKG_CHECK_EXISTS], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl if test -n "$PKG_CONFIG" && \ AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then m4_default([$2], [:]) m4_ifvaln([$3], [else $3])dnl fi]) # _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) # --------------------------------------------- m4_define([_PKG_CONFIG], [if test -n "$$1"; then pkg_cv_[]$1="$$1" elif test -n "$PKG_CONFIG"; then PKG_CHECK_EXISTS([$3], [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` test "x$?" != "x0" && pkg_failed=yes ], [pkg_failed=yes]) else pkg_failed=untried fi[]dnl ])# _PKG_CONFIG # _PKG_SHORT_ERRORS_SUPPORTED # ----------------------------- AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], [AC_REQUIRE([PKG_PROG_PKG_CONFIG]) if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi[]dnl ])# _PKG_SHORT_ERRORS_SUPPORTED # PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], # [ACTION-IF-NOT-FOUND]) # # # Note that if there is a possibility the first call to # PKG_CHECK_MODULES might not happen, you should be sure to include an # explicit call to PKG_PROG_PKG_CONFIG in your configure.ac # # # -------------------------------------------------------------- AC_DEFUN([PKG_CHECK_MODULES], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl pkg_failed=no AC_MSG_CHECKING([for $1]) _PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) _PKG_CONFIG([$1][_LIBS], [libs], [$2]) m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS and $1[]_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details.]) if test $pkg_failed = yes; then AC_MSG_RESULT([no]) _PKG_SHORT_ERRORS_SUPPORTED if test $_pkg_short_errors_supported = yes; then $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` else $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` fi # Put the nasty error message in config.log where it belongs echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD m4_default([$4], [AC_MSG_ERROR( [Package requirements ($2) were not met: $$1_PKG_ERRORS Consider adjusting the PKG_CONFIG_PATH environment variable if you installed software in a non-standard prefix. _PKG_TEXT])[]dnl ]) elif test $pkg_failed = untried; then AC_MSG_RESULT([no]) m4_default([$4], [AC_MSG_FAILURE( [The pkg-config script could not be found or is too old. Make sure it is in your PATH or set the PKG_CONFIG environment variable to the full path to pkg-config. _PKG_TEXT To get pkg-config, see .])[]dnl ]) else $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS $1[]_LIBS=$pkg_cv_[]$1[]_LIBS AC_MSG_RESULT([yes]) $3 fi[]dnl ])# PKG_CHECK_MODULES m4ri-release-20240729/m4ri.pc.in000066400000000000000000000005061465170556500161010ustar00rootroot00000000000000prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: M4RI Description: Dense linear algebra over GF(2). Version: @PACKAGE_VERSION@ Requires: @M4RI_USE_PNG_PC@ Libs: -L${libdir} -lm4ri @RAW_LIBPNG@ @LIBM@ @LIBPNG_LIBADD@ Cflags: -I${includedir} @SIMD_CFLAGS@ @OPENMP_CFLAGS@ @LIBPNG_CFLAGS@ m4ri-release-20240729/m4ri.vcxproj000066400000000000000000000137601465170556500165730ustar00rootroot00000000000000 Debug Win32 Release Win32 {B7057015-3433-44CD-8D13-F5AA62A1AC9E} Win32Proj StaticLibrary StaticLibrary <_ProjectFileVersion>10.0.30319.1 Debug\ Debug\ true Release\ Release\ false .lib .lib Disabled WIN32;_DEBUG;_WINDOWS;_USRDLL;M4RI_EXPORTS;%(PreprocessorDefinitions) true EnableFastChecks MultiThreadedDebugDLL Level3 EditAndContinue CompileAsCpp true Windows MachineX86 WIN32;NDEBUG;_WINDOWS;_USRDLL;M4RI_EXPORTS;%(PreprocessorDefinitions) MultiThreadedDLL Level3 ProgramDatabase CompileAsCpp true Windows true true MachineX86 m4ri-release-20240729/m4ri/000077500000000000000000000000001465170556500151475ustar00rootroot00000000000000m4ri-release-20240729/m4ri/Doxyfile000066400000000000000000003050341465170556500166620ustar00rootroot00000000000000# Doxyfile 1.8.7 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all text # before the first occurrence of this tag. Doxygen uses libiconv (or the iconv # built into libc) for the transcoding. See http://www.gnu.org/software/libiconv # for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = M4RI # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = 20240729 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify an logo or icon that is included in # the documentation. The maximum height of the logo should not exceed 55 pixels # and the maximum width should not exceed 200 pixels. Doxygen will copy the logo # to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = ../doc/ # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a # new page for each member. If set to NO, the documentation of a member will be # part of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 8 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. ALIASES = "" # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: # FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: # Fortran. In the later case the parser tries to guess whether the code is fixed # or free formatted code, this is the default for Fortran type files), VHDL. For # instance to make doxygen treat .inc files as Fortran files (default is PHP), # and .f files as C (default is Fortran), use: inc=Fortran f=C. # # Note For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by by putting a % sign in front of the word # or globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PACKAGE tag is set to YES all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined # locally in source files will be included in the documentation. If set to NO # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO these classes will be included in the various overviews. This option has # no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the # todo list. This list is created by putting \todo commands in the # documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the # test list. This list is created by putting \test commands in the # documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES the list # will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. Do not use file names with spaces, bibtex cannot handle them. See # also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO doxygen will only warn about wrong or incomplete parameter # documentation, but not about the absence of documentation. # The default value is: NO. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. # Note: If this tag is empty the current directory is searched. INPUT = # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: http://www.gnu.org/software/libiconv) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank the # following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, # *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, # *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, # *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, # *.qsf, *.as and *.js. FILE_PATTERNS = # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = ../tests # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER ) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES, then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see http://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the config file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = NO # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = doxygen.css # The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user- # defined cascading style sheet that is included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefor more robust against future updates. # Doxygen will copy the style sheet file to the output directory. For an example # see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the stylesheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # http://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to NO can help when comparing the output of multiple runs. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: http://developer.apple.com/tools/xcode/), introduced with # OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler ( hhc.exe). If non-empty # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated ( # YES) or that it should be included in the master .chm file ( NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated ( # YES) or a normal table of contents ( NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # http://www.mathjax.org) which uses client side Javascript for the rendering # instead of using prerendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from http://www.mathjax.org before deployment. # The default value is: http://cdn.mathjax.org/mathjax/latest. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/ # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /