pax_global_header00006660000000000000000000000064121230236430014506gustar00rootroot0000000000000052 comment=ed4743b417502adfb135b4eae58123e16e802453 libm4rie-20130416/000077500000000000000000000000001212302364300134575ustar00rootroot00000000000000libm4rie-20130416/.hgignore000066400000000000000000000005521212302364300152640ustar00rootroot00000000000000# use glob syntax. syntax: glob Makefile.in aclocal.m4 config.guess configure config.sub depcomp install-sh ltmain.sh m4/libtool.m4 m4/ltoptions.m4 m4/ltsugar.m4 m4/ltversion.m4 m4/lt~obsolete.m4 missing src/config.h.in~ src/cscope.out src/cscope.files .deps autom4te.cache Makefile bench/Makefile config.log config.status libtool src/config.h src/stamp-h1 doc libm4rie-20130416/AUTHORS000066400000000000000000000000001212302364300145150ustar00rootroot00000000000000libm4rie-20130416/COPYING000066400000000000000000000431301212302364300145130ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19yy name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. libm4rie-20130416/ChangeLog000066400000000000000000000000001212302364300152170ustar00rootroot00000000000000libm4rie-20130416/INSTALL000066400000000000000000000363321212302364300145170ustar00rootroot00000000000000Installation Instructions ************************* Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without warranty of any kind. Basic Installation ================== Briefly, the shell commands `./configure; make; make install' should configure, build, and install this package. The following more-detailed instructions are generic; see the `README' file for instructions specific to this package. Some packages provide this `INSTALL' file but do not implement all of the features documented below. The lack of an optional feature in a given package is not necessarily a bug. More recommendations for GNU packages can be found in *note Makefile Conventions: (standards)Makefile Conventions. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. Caching is disabled by default to prevent problems with accidental use of stale cache files. If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. Running `configure' might take a while. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package, generally using the just-built uninstalled binaries. 4. Type `make install' to install the programs and any data files and documentation. When installing into a prefix owned by root, it is recommended that the package be configured and built as a regular user, and only the `make install' phase executed with root privileges. 5. Optionally, type `make installcheck' to repeat any self-tests, but this time using the binaries in their final installed location. This target does not install anything. Running this target as a regular user, particularly if the prior `make install' required root privileges, verifies that the installation completed correctly. 6. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. 7. Often, you can also type `make uninstall' to remove the installed files again. In practice, not all packages have tested that uninstallation works correctly, even though it is required by the GNU Coding Standards. 8. Some packages, particularly those that use Automake, provide `make distcheck', which can by used by developers to test that all other targets like `make install' and `make uninstall' work correctly. This target is generally not run by end users. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c99 CFLAGS=-g LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you can use GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. This is known as a "VPATH" build. With a non-GNU `make', it is safer to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. On MacOS X 10.5 and later systems, you can create libraries and executables that work on multiple system types--known as "fat" or "universal" binaries--by specifying multiple `-arch' options to the compiler but only a single `-arch' option to the preprocessor. Like this: ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CPP="gcc -E" CXXCPP="g++ -E" This is not guaranteed to produce working output in all cases, you may have to build one architecture at a time and combine the results using the `lipo' tool if you have problems. Installation Names ================== By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PREFIX', where PREFIX must be an absolute file name. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you pass the option `--exec-prefix=PREFIX' to `configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. In general, the default for these options is expressed in terms of `${prefix}', so that specifying just `--prefix' will affect all of the other directory specifications that were not explicitly provided. The most portable way to affect installation locations is to pass the correct locations to `configure'; however, many packages provide one or both of the following shortcuts of passing variable assignments to the `make install' command line to change installation locations without having to reconfigure or recompile. The first method involves providing an override variable for each affected directory. For example, `make install prefix=/alternate/directory' will choose an alternate location for all directory configuration variables that were expressed in terms of `${prefix}'. Any directories that were specified during `configure', but not in terms of `${prefix}', must each be overridden at install time for the entire installation to be relocated. The approach of makefile variable overrides for each directory variable is required by the GNU Coding Standards, and ideally causes no recompilation. However, some platforms have known limitations with the semantics of shared libraries that end up requiring recompilation when using this method, particularly noticeable in packages that use GNU Libtool. The second method involves providing the `DESTDIR' variable. For example, `make install DESTDIR=/alternate/directory' will prepend `/alternate/directory' before all installation names. The approach of `DESTDIR' overrides is not required by the GNU Coding Standards, and does not work on platforms that have drive letters. On the other hand, it does better at avoiding recompilation issues, and works well even when some directory options were not specified in terms of `${prefix}' at `configure' time. Optional Features ================= If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Some packages offer the ability to configure how verbose the execution of `make' will be. For these packages, running `./configure --enable-silent-rules' sets the default to minimal output, which can be overridden with `make V=1'; while running `./configure --disable-silent-rules' sets the default to verbose, which can be overridden with `make V=0'. Particular systems ================== On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC is not installed, it is recommended to use the following options in order to use an ANSI C compiler: ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" and if that doesn't work, install pre-built binaries of GCC for HP-UX. On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot parse its `' header file. The option `-nodtk' can be used as a workaround. If GNU CC is not installed, it is therefore recommended to try ./configure CC="cc" and if that doesn't work, try ./configure CC="cc -nodtk" On Solaris, don't put `/usr/ucb' early in your `PATH'. This directory contains several dysfunctional programs; working variants of these programs are available in `/usr/bin'. So, if you need `/usr/ucb' in your `PATH', put it _after_ `/usr/bin'. On Haiku, software installed for all users goes in `/boot/common', not `/usr/local'. It is recommended to use the following options: ./configure --prefix=/boot/common Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the option `--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc causes the specified `gcc' to be used as the C compiler (unless it is overridden in the site shell script). Unfortunately, this technique does not work for `CONFIG_SHELL' due to an Autoconf bug. Until the bug is fixed you can use this workaround: CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of all of the options to `configure', and exit. `--help=short' `--help=recursive' Print a summary of the options unique to this package's `configure', and exit. The `short' variant lists options used only in the top level, while the `recursive' variant lists options also present in any nested packages. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `--prefix=DIR' Use DIR as the installation prefix. *note Installation Names:: for more details, including other options available for fine-tuning the installation locations. `--no-create' `-n' Run the configure checks, but stop before creating any output files. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. libm4rie-20130416/Makefile.am000066400000000000000000000046441212302364300155230ustar00rootroot00000000000000AUTOMAKE_OPTIONS = gnu ACLOCAL_AMFLAGS = -I m4 AM_CFLAGS=${SIMD_FLAGS} ${OPENMP_CFLAGS} ${DEBUG_FLAGS} ${M4RIE_M4RI_CFLAGS} ${M4RI_CFLAGS} lib_LTLIBRARIES = libm4rie.la libm4rie_la_SOURCES = m4rie/gf2e.c \ m4rie/mzed.c \ m4rie/newton_john.c \ m4rie/echelonform.c \ m4rie/strassen.c \ m4rie/mzd_slice.c \ m4rie/mzd_poly.c \ m4rie/trsm.c \ m4rie/ple.c \ m4rie/conversion.c \ m4rie/conversion_slice8.c \ m4rie/conversion_slice16.c \ m4rie/conversion_cling8.c \ m4rie/conversion_cling16.c \ m4rie/mzd_slice_intro.inl \ m4rie/mzd_slice_outro.inl \ m4rie/mzed_intro.inl \ m4rie/mzed_outro.inl \ m4rie/trsm.inl pkgincludesubdir = $(includedir)/m4rie pkgincludesub_HEADERS = m4rie/gf2x.h \ m4rie/gf2e.h \ m4rie/mzed.h \ m4rie/m4rie.h \ m4rie/m4ri_functions.h \ m4rie/newton_john.h \ m4rie/echelonform.h \ m4rie/strassen.h \ m4rie/mzd_slice.h \ m4rie/mzd_poly.h \ m4rie/trsm.h \ m4rie/ple.h \ m4rie/permutation.h \ m4rie/conversion.h \ gf2e_cxx/finite_field_givaro.h libm4rie_la_LDFLAGS = -release 0.0.$(RELEASE) -no-undefined ${M4RIE_M4RI_LDFLAGS} libm4rie_la_LIBADD = -lm4ri #testing TESTCXXFLAGS = ${AM_CFLAGS} @CXXFLAGS@ ${M4RIE_M4RI_CFLAGS} ${M4RI_CFLAGS} -I./tests TESTLIBADD = -lm4ri -lm4rie TESTLDADD = TESTLDFLAGS = check_PROGRAMS = test_trsm test_elimination test_multiplication test_smallops test_ple test_elimination_SOURCES = tests/test_elimination.cc tests/testing.h test_elimination_LDADD = ${TESTLDADD} ${TESTLIBADD} test_elimination_LDFLAGS = ${TESTLDFLAGS} test_elimination_CXXFLAGS = ${TESTCXXFLAGS} test_multiplication_SOURCES = tests/test_multiplication.cc tests/testing.h test_multiplication_LDADD = ${TESTLDADD} ${TESTLIBADD} test_multiplication_LDFLAGS = ${TESTLDFLAGS} test_multiplication_CXXFLAGS = ${TESTCXXFLAGS} test_smallops_SOURCES = tests/test_smallops.cc tests/testing.h test_smallops_LDADD = ${TESTLDADD} ${TESTLIBADD} test_smallops_LDFLAGS = ${TESTLDFLAGS} test_smallops_CXXFLAGS = ${TESTCXXFLAGS} test_trsm_SOURCES = tests/test_trsm.cc tests/testing.h test_trsm_LDADD = ${TESTLDADD} ${TESTLIBADD} test_trsm_LDFLAGS = ${TESTLDFLAGS} test_trsm_CXXFLAGS = ${TESTCXXFLAGS} test_ple_SOURCES = tests/test_ple.cc tests/testing.h test_ple_LDADD = ${TESTLDADD} ${TESTLIBADD} test_ple_LDFLAGS = ${TESTLDFLAGS} test_ple_CXXFLAGS = ${TESTCXXFLAGS} TESTS = test_trsm test_elimination test_multiplication test_smallops test_ple # benchmarketing SUBDIRS = . bench clean-local: (cd tests; make clean; cd ..) libm4rie-20130416/NEWS000066400000000000000000000000001212302364300141440ustar00rootroot00000000000000libm4rie-20130416/README000066400000000000000000000000001212302364300143250ustar00rootroot00000000000000libm4rie-20130416/bench/000077500000000000000000000000001212302364300145365ustar00rootroot00000000000000libm4rie-20130416/bench/Makefile.am000066400000000000000000000026061212302364300165760ustar00rootroot00000000000000CPUCYCLES_DIR=./cpucycles-20060326 BENCHCXXFLAGS = $(AM_CFLAGS) -I.. -I${CPUCYCLES_DIR} @CXXFLAGS@ -DNDEBUG BENCHLIBADD = -lm4ri cpucycles.o BENCHLDFLAGS = -Wl,-rpath,../.libs/ ../.libs/libm4rie.so -L${CPUCYCLES_DIR} -Wl,-rpath,${M4RIE_M4RI_PREFIX}/lib EXTRA_PROGRAMS = bench_multiplication bench_elimination bench_smallops bench_trsm bench_ple .PHONY: clean dist-clean cpucycles.o: (cd $(CPUCYCLES_DIR); sh do; mv cpucycles.o ..; mv cpucycles.h ..; cd ../..) cpucycles.h: cpucycles.o clean-local: -rm -f cpucycles.h -rm -f $(EXTRA_PROGRAMS) bench_elimination_SOURCES=bench_elimination.cc cpucycles.h benchmarking.cc bench_elimination_CXXFLAGS=${BENCHCXXFLAGS} bench_elimination_LDFLAGS=${BENCHLDFLAGS} ${BENCHLIBADD} bench_multiplication_SOURCES=bench_multiplication.cc cpucycles.h benchmarking.cc bench_multiplication_CXXFLAGS=${BENCHCXXFLAGS} bench_multiplication_LDFLAGS=${BENCHLDFLAGS} ${BENCHLIBADD} bench_smallops_SOURCES=bench_smallops.cc cpucycles.h benchmarking.cc bench_smallops_CXXFLAGS=${BENCHCXXFLAGS} bench_smallops_LDFLAGS=${BENCHLDFLAGS} ${BENCHLIBADD} bench_trsm_SOURCES=bench_trsm.cc cpucycles.h benchmarking.cc bench_trsm_CXXFLAGS=${BENCHCXXFLAGS} bench_trsm_LDFLAGS=${BENCHLDFLAGS} ${BENCHLIBADD} bench_ple_SOURCES=bench_ple.cc cpucycles.h benchmarking.cc bench_ple_CXXFLAGS=${BENCHCXXFLAGS} bench_ple_LDFLAGS=${BENCHLDFLAGS} ${BENCHLIBADD} bench: cpucycles.o ${EXTRA_PROGRAMS}libm4rie-20130416/bench/bench_elimination.cc000066400000000000000000000072261212302364300205230ustar00rootroot00000000000000#include #include #include "benchmarking.h" struct elim_params { rci_t k; rci_t m; rci_t n; rci_t r; char const *algorithm; char type; }; int run_mzed(void *_p, unsigned long long *data, int *data_len) { struct elim_params *p = (struct elim_params *)_p; *data_len = 2; gf2e *ff = gf2e_init(irreducible_polynomials[p->k][1]); mzed_t *A = mzed_init(ff,p->m,p->n); mzed_randomize(A); data[0] = walltime(0); data[1] = cpucycles(); if(strcmp(p->algorithm,"newton-john")==0) p->r= mzed_echelonize_newton_john(A, 1); else if(strcmp(p->algorithm,"naive")==0) p->r = mzed_echelonize_naive(A, 1); else if(strcmp(p->algorithm,"ple")==0) p->r = mzed_echelonize_ple(A, 1); else if(strcmp(p->algorithm,"default")==0) p->r = mzed_echelonize(A, 1); else m4ri_die("uknown algorithm '%s'\n.",p->algorithm); data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzed_free(A); gf2e_free(ff); return 0; } int run_mzd_slice(void *_p, unsigned long long *data, int *data_len) { struct elim_params *p = (struct elim_params *)_p; *data_len = 2; gf2e *ff = gf2e_init(irreducible_polynomials[p->k][1]); mzd_slice_t *A = mzd_slice_init(ff,p->m,p->n); mzd_slice_randomize(A); data[0] = walltime(0); data[1] = cpucycles(); if(strcmp(p->algorithm,"newton-john")==0) { mzed_t *B = mzed_cling(NULL, A); p->r= mzed_echelonize_newton_john(B, 1); mzed_slice(A, B); mzed_free(B); } else if(strcmp(p->algorithm,"naive")==0) { mzed_t *B = mzed_cling(NULL, A); p->r = mzed_echelonize_naive(B, 1); mzed_slice(A, B); mzed_free(B); } else if(strcmp(p->algorithm,"ple")==0) { p->r = mzd_slice_echelonize_ple(A, 1); } else if(strcmp(p->algorithm,"default")==0) { p->r = mzd_slice_echelonize(A, 1); } else { m4ri_die("uknown algorithm '%s'\n.",p->algorithm); } data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzd_slice_free(A); gf2e_free(ff); return 0; } void print_help() { printf("bench_elimination:\n\n"); printf("REQUIRED\n"); printf(" e -- integer between 2 and 10\n"); printf(" m -- integer > 0, number of rows\n"); printf(" n -- integer > 0, number of columns\n"); printf(" algorithm -- default -- let M4RIE decide\n"); printf(" naive -- cubic Gaussian elimination\n"); printf(" newton-john -- Newton-John tables\n"); printf(" ple -- PLE based\n"); printf(" type -- mzed_t or mzd_slice_t (default: mzed_t)\n"); printf("\n"); bench_print_global_options(stdout); } int main(int argc, char **argv) { global_options(&argc, &argv); if (argc < 4) { print_help(); m4ri_die(""); } struct elim_params params; params.k = atoi(argv[1]); params.m = atoi(argv[2]); params.n = atoi(argv[3]); if (argc >= 5) params.algorithm = argv[4]; else params.algorithm = (char*)"default"; if (argc >= 6) { if (strcmp("mzed_t",argv[5]) == 0) params.type = 0; else if (strcmp("mzd_slice_t",argv[5]) == 0) params.type = 1; else m4ri_die("unknown type '%s'\n",argv[5]); } else { params.type = 0; } srandom(17); unsigned long long data[2]; if (params.type == 0) run_bench(run_mzed, (void*)¶ms, data, 2); else run_bench(run_mzd_slice, (void*)¶ms, data, 2); double cc_per_op = ((double)data[1])/ ((double)params.m * (double)params.n * powl((double)params.r,__M4RIE_OMEGA-2) ); printf("e: %2d, m: %5d, n: %5d, type: %d, algo: %10s, cpu cycles: %10llu, cc/(mnr^0.807): %.5lf, wall time: %lf\n", params.k, params.m, params.n, params.type, params.algorithm, data[1], cc_per_op, data[0] / 1000000.0); } libm4rie-20130416/bench/bench_multiplication.cc000066400000000000000000000073671212302364300212560ustar00rootroot00000000000000#include #include #include "benchmarking.h" struct mul_params { rci_t k; rci_t m; rci_t n; char const *algorithm; char type; }; int run_mzed(void *_p, unsigned long long *data, int *data_len) { struct mul_params *p = (struct mul_params *)_p; *data_len = 2; gf2e *ff = gf2e_init(irreducible_polynomials[p->k][1]); mzed_t *A = mzed_init(ff,p->m,p->n); mzed_randomize(A); mzed_t *B = mzed_init(ff,p->n,p->m); mzed_randomize(B); mzed_t *C; data[0] = walltime(0); data[1] = cpucycles(); if(strcmp(p->algorithm, "newton-john")==0) C = mzed_mul_newton_john(NULL, A, B); else if(strcmp(p->algorithm,"naive")==0) C = mzed_mul_naive(NULL, A, B); else if(strcmp(p->algorithm,"strassen")==0) C = mzed_mul_strassen(NULL, A, B,_mzed_strassen_cutoff(NULL, A, B)); else if(strcmp(p->algorithm,"karatsuba")==0) C = mzed_mul_karatsuba(NULL, A, B); else if(strcmp(p->algorithm,"default")==0) C = mzed_mul(NULL, A, B); else m4ri_die("uknown algorithm '%s'\n.",p->algorithm); data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzed_free(A); mzed_free(B); mzed_free(C); gf2e_free(ff); return 0; } int run_mzd_slice(void *_p, unsigned long long *data, int *data_len) { struct mul_params *p = (struct mul_params *)_p; *data_len = 2; gf2e *ff = gf2e_init(irreducible_polynomials[p->k][1]); mzd_slice_t *A = mzd_slice_init(ff,p->m,p->n); mzd_slice_randomize(A); mzd_slice_t *B = mzd_slice_init(ff,p->n,p->m); mzd_slice_randomize(B); mzd_slice_t *C; data[0] = walltime(0); data[1] = cpucycles(); if(strcmp(p->algorithm,"karatsuba")==0) { C = mzd_slice_mul_karatsuba(NULL, A, B); } else if(strcmp(p->algorithm,"default")==0) { C = mzd_slice_mul(NULL, A, B); } else { m4ri_die("uknown algorithm '%s'\n.",p->algorithm); } data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzd_slice_free(A); mzd_slice_free(B); mzd_slice_free(C); gf2e_free(ff); return 0; } void print_help() { printf("bench_elimination:\n\n"); printf("REQUIRED\n"); printf(" e -- integer between 2 and 10\n"); printf(" m -- integer > 0, number of rows\n"); printf(" n -- integer > 0, number of columns\n"); printf(" algorithm -- default -- let M4RIE decide (mzed_t, mzd_slice_t)\n"); printf(" naive -- cubic multiplication (mzed_t)\n"); printf(" newton-john -- Newton-John tables (mzed_t) \n"); printf(" strassen -- Strassen+Newton-John (mzed_t)\n"); printf(" karatsuba -- Karatsuba (mzed_t)\n"); printf(" type -- mzed_t or mzd_slice_t (default: mzed_t)\n"); printf("\n"); bench_print_global_options(stdout); } int main(int argc, char **argv) { global_options(&argc, &argv); if (argc < 4) { print_help(); m4ri_die(""); } struct mul_params params; params.k = atoi(argv[1]); params.m = atoi(argv[2]); params.n = atoi(argv[3]); if (argc >= 5) params.algorithm = argv[4]; else params.algorithm = (char*)"default"; if (argc >= 6) { if (strcmp("mzed_t",argv[5]) == 0) params.type = 0; else if (strcmp("mzd_slice_t",argv[5]) == 0) params.type = 1; else m4ri_die("unknown type '%s'\n",argv[5]); } else { params.type = 0; } srandom(17); unsigned long long data[2]; if (params.type == 0) run_bench(run_mzed, (void*)¶ms, data, 2); else run_bench(run_mzd_slice, (void*)¶ms, data, 2); double cc_per_op = ((double)data[1])/ ( (double)params.m * powl((double)params.n,1.807) ); printf("e: %2d, m: %5d, n: %5d, type: %d, algo: %10s, cpu cycles: %10llu, cc/(mn^1.807): %.5lf, wall time: %lf\n", params.k, params.m, params.n, params.type, params.algorithm, data[1], cc_per_op, data[0] / 1000000.0); } libm4rie-20130416/bench/bench_ple.cc000066400000000000000000000043621212302364300167710ustar00rootroot00000000000000#include #include #include "benchmarking.h" struct ple_params { rci_t k; rci_t m; rci_t n; rci_t c; rci_t r; char const *algorithm; }; int run(void *_p, unsigned long long *data, int *data_len) { struct ple_params *p = (struct ple_params *)_p; *data_len = 2; gf2e *ff = gf2e_init(irreducible_polynomials[p->k][1]); mzed_t *A = mzed_init(ff, p->m, p->n); mzed_randomize(A); mzp_t *P = mzp_init(p->m); mzp_t *Q = mzp_init(p->n); data[1] = cpucycles(); data[0] = walltime(0.0); if(strcmp(p->algorithm,"default")==0) p->r = _mzed_ple(A, P, Q, p->c); else if(strcmp(p->algorithm,"newton-john")==0) p->r = mzed_ple_newton_john(A, P, Q); else if(strcmp(p->algorithm,"naive")==0) p->r = mzed_ple_naive(A, P, Q); else p->r = mzed_echelonize(A, 1); data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzed_free(A); mzp_free(P); mzp_free(Q); gf2e_free(ff); return 0; } void print_help() { printf("bench_ple:\n\n"); printf("REQUIRED\n"); printf(" e -- integer between 2 and 10\n"); printf(" m -- integer > 0\n"); printf(" n -- integer > 0\n"); printf(" algorithm -- default\n"); printf(" newton-john\n"); printf(" naive\n"); printf(" c -- cutoff (for 'default')\n"); printf("\n"); bench_print_global_options(stdout); } int main(int argc, char **argv) { global_options(&argc, &argv); if (argc < 4) { print_help(); m4ri_die(""); } struct ple_params params; params.k = atoi(argv[1]); params.m = atoi(argv[2]); params.n = atoi(argv[3]); if (argc >= 5) params.algorithm = argv[4]; else params.algorithm = (char*)"default"; if (argc >= 6) params.c = atoi(argv[5]); else params.c = 0; if(argc >= 7) { print_help(); m4ri_die(""); } srandom(17); unsigned long long data[2]; run_bench(run, (void*)¶ms, data, 2); double cc_per_op = ((double)data[1])/ ( (double)params.m * (double)params.n * powl((double)params.r,__M4RIE_OMEGA-2) ); printf("e: %2d, m: %5d, n: %5d, algorithm: %10s, cutoff: %10d, cpu cycles: %10llu, cc/(mnr^0.807): %.5lf, wall time: %lf\n", params.k, params.m, params.n, params.algorithm, params.c, data[1], cc_per_op, data[0] / 1000000.0); } libm4rie-20130416/bench/bench_smallops.cc000066400000000000000000000057051212302364300200450ustar00rootroot00000000000000#include #include #include "benchmarking.h" struct smallops_params { rci_t k; rci_t m; rci_t n; char const *algorithm; }; int run_mzed_add(void *_p, unsigned long long *data, int *data_len) { struct smallops_params *p = (struct smallops_params *)_p; *data_len = 2; gf2e *ff = gf2e_init(irreducible_polynomials[p->k][1]); mzed_t *A = mzed_init(ff,p->m,p->n); mzed_randomize(A); mzed_t *B = mzed_init(ff,p->m,p->n); mzed_randomize(B); mzed_t *C = mzed_init(ff,p->m,p->n); data[0] = walltime(0); data[1] = cpucycles(); mzed_add(C, A, B); data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzed_free(A); mzed_free(B); mzed_free(C); gf2e_free(ff); return 0; } int run_mzed_slice(void *_p, unsigned long long *data, int *data_len) { struct smallops_params *p = (struct smallops_params *)_p; *data_len = 2; gf2e *ff = gf2e_init(irreducible_polynomials[p->k][1]); mzed_t *A = mzed_init(ff,p->m,p->n); mzed_randomize(A); mzd_slice_t *a = mzd_slice_init(ff,p->m,p->n); data[0] = walltime(0); data[1] = cpucycles(); mzed_slice(a, A); data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzed_free(A); mzd_slice_free(a); gf2e_free(ff); return 0; } int run_mzed_cling(void *_p, unsigned long long *data, int *data_len) { struct smallops_params *p = (struct smallops_params *)_p; *data_len = 2; gf2e *ff = gf2e_init(irreducible_polynomials[p->k][1]); mzd_slice_t *a = mzd_slice_init(ff,p->m,p->n); mzd_slice_randomize(a); mzed_t *A = mzed_init(ff, p->m, p->n); data[0] = walltime(0); data[1] = cpucycles(); mzed_cling(A, a); data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzed_free(A); mzd_slice_free(a); gf2e_free(ff); return 0; } void print_help() { printf("bench_smallops:\n\n"); printf("REQUIRED\n"); printf(" e -- integer between 2 and 10\n"); printf(" m -- integer > 0\n"); printf(" n -- integer > 0\n"); printf(" what -- mze_cling\n"); printf(" mzed_slice\n"); printf(" mzed_add\n"); printf("\n"); bench_print_global_options(stdout); } int main(int argc, char **argv) { global_options(&argc, &argv); if (argc < 5) { print_help(); m4ri_die(""); } struct smallops_params params; params.k = atoi(argv[1]); params.m = atoi(argv[2]); params.n = atoi(argv[3]); srandom(17); unsigned long long data[2]; if(strcmp(argv[4],"mzed_slice") == 0) { run_bench(run_mzed_slice, (void*)¶ms, data, 2); } else if(strcmp(argv[4],"mzed_cling") == 0) { run_bench(run_mzed_cling, (void*)¶ms, data, 2); } else if(strcmp(argv[4],"mzed_add") == 0) { run_bench(run_mzed_add, (void*)¶ms, data, 2); } double cc_per_op = ((double)data[1])/ ( (double)params.m * (double)params.n ); printf("%s: m: %5d, n: %5d, cpu cycles: %10llu, cc/(mn): %.5lf, wall time: %lf\n", argv[4], params.m, params.n, data[1], cc_per_op, data[0] / 1000000.0); } libm4rie-20130416/bench/bench_trsm.cc000066400000000000000000000111111212302364300171640ustar00rootroot00000000000000#include #include #include "benchmarking.h" struct elim_params { rci_t e; rci_t m; rci_t n; char const *matrix_type; char const *direction; char const *algorithm; rci_t cutoff; }; int run_mzed(void *_p, unsigned long long *data, int *data_len) { struct elim_params *p = (struct elim_params *)_p; *data_len = 2; gf2e *ff = gf2e_init(irreducible_polynomials[p->e][1]); mzed_t *A = mzed_init(ff,p->m,p->m); mzed_randomize(A); const int bitmask = (1<degree)-1; for(rci_t i=0; im; i++) { while(mzed_read_elem(A, i, i) == 0) { mzed_write_elem(A, i, i, random()&bitmask) ; } }; mzed_t *B = mzed_init(ff,p->m,p->n); mzed_randomize(B); data[0] = walltime(0); data[1] = cpucycles(); if (strcmp(p->direction,"lower_left")==0) { if(strcmp(p->algorithm,"naive")==0) mzed_trsm_lower_left_naive(A, B); else if(strcmp(p->algorithm,"newton-john")==0) mzed_trsm_lower_left_newton_john(A, B); else _mzed_trsm_lower_left(A, B, p->cutoff); } else if (strcmp(p->direction,"upper_left")==0) { if(strcmp(p->algorithm,"naive")==0) mzed_trsm_upper_left_naive(A, B); else if(strcmp(p->algorithm,"newton-john")==0) mzed_trsm_upper_left_newton_john(A, B); else _mzed_trsm_upper_left(A, B, p->cutoff); } else { m4ri_die("unknown direction"); } data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzed_free(A); mzed_free(B); gf2e_free(ff); return 0; } int run_mzd_slice(void *_p, unsigned long long *data, int *data_len) { struct elim_params *p = (struct elim_params *)_p; *data_len = 2; gf2e *ff = gf2e_init(irreducible_polynomials[p->e][1]); mzd_slice_t *A = mzd_slice_init(ff,p->m,p->m); mzd_slice_randomize(A); const int bitmask = (1<degree)-1; for(rci_t i=0; im; i++) { while(mzd_slice_read_elem(A, i, i) == 0) { mzd_slice_write_elem(A, i, i, random()&bitmask) ; } }; mzd_slice_t *B = mzd_slice_init(ff,p->m,p->n); mzd_slice_randomize(B); data[0] = walltime(0); data[1] = cpucycles(); if (strcmp(p->direction,"lower_left")==0) { if(strcmp(p->algorithm,"naive")==0) mzd_slice_trsm_lower_left_naive(A, B); else if(strcmp(p->algorithm,"newton-john")==0) mzd_slice_trsm_lower_left_newton_john(A, B); else _mzd_slice_trsm_lower_left(A, B, p->cutoff); } else if (strcmp(p->direction,"upper_left")==0) { if(strcmp(p->algorithm,"naive")==0) mzd_slice_trsm_upper_left_naive(A, B); else if(strcmp(p->algorithm,"newton-john")==0) mzd_slice_trsm_upper_left_newton_john(A, B); else _mzd_slice_trsm_upper_left(A, B, p->cutoff); } else { m4ri_die("unknown direction"); } data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); mzd_slice_free(A); mzd_slice_free(B); gf2e_free(ff); return 0; } void print_help() { printf("bench_trsm:\n\n"); printf("REQUIRED\n"); printf(" e -- integer between 2 and 10\n"); printf(" m -- integer > 0, dimension of U or L\n"); printf(" n -- integer > 0\n"); printf(" matrix_type - mzed_t\n"); printf(" - mzd_slice_t\n"); printf(" direction - lower_left\n"); printf(" - upper_left\n"); printf(" algorithm -- default\n"); printf(" naive\n"); printf(" c -- cutoff (for 'default')\n"); printf("\n"); bench_print_global_options(stdout); } int main(int argc, char **argv) { global_options(&argc, &argv); if (argc < 6) { print_help(); m4ri_die(""); } struct elim_params params; params.e = atoi(argv[1]); params.m = atoi(argv[2]); params.n = atoi(argv[3]); params.matrix_type = argv[4]; params.direction = argv[5]; if (strcmp(params.direction,"lower_left") != 0 && strcmp(params.direction,"upper_left") != 0) m4ri_die("not implemented."); if (argc >= 7) params.algorithm = argv[6]; else params.algorithm = (char*)"default"; if (argc >= 8) params.cutoff = atoi(argv[7]); else params.cutoff = MZED_TRSM_CUTOFF; srandom(17); unsigned long long data[2]; if (strcmp(params.matrix_type,"mzed_t") == 0) run_bench(run_mzed, (void*)¶ms, data, 2); else if(strcmp(params.matrix_type,"mzd_slice_t") == 0) run_bench(run_mzd_slice, (void*)¶ms, data, 2); else m4ri_die("unknown type '%s'",params.matrix_type); double cc_per_op = ((double)data[1])/ ( powl((double)params.m,__M4RIE_OMEGA-1) * params.n ); printf("e: %2d, m: %5d, n: %5d, cutoff: %4d, cpu cycles: %10llu, cc/(mmn^0.807): %.5lf, wall time: %lf\n", params.e, params.m, params.n, params.cutoff, data[1], cc_per_op, data[0] / 1000000.0); } libm4rie-20130416/bench/benchmarking.cc000066400000000000000000000476201212302364300175060ustar00rootroot00000000000000/* * benchmarking.cc * * Benchmark engine. * * Copyright (C) 2011 Carlo Wood * RSA-1024 0x624ACAD5 1997-01-26 Sign & Encrypt * Fingerprint16 = 32 EC A7 B6 AC DB 65 A6 F6 F6 55 DD 1C DC FF 61 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * Example usage: * * ./bench_elimination -s 0 -m 4 -c 90 -a 0.005 -d -t 30 -n 1000 1000 1000 * * would run at most 30 seconds (-t) or 1000 times (-n), whichever comes * first, or stop after the real average of wall time (-s 0) falls with 90% * certainty (-c) in a range that is +/- 0.005 times the observed mean (-a: accuracry), * but no sooner than that at least 4 (-m: minimum) measurements have been * done. It would also print (-d: dump) each measurement (0:microseconds 1:cpuclocks). * * Example output. * * 2416 6441500 * 2376 6335490 * 2360 6294450 * 2361 6295280 * 2371 6321440 * 2350 6266740 * 2362 6298700 * 2386 6362520 * 2344 6249890 * 2347 6260450 * 2346 6254590 * Total running time: 0.103 seconds. * Virtual time (s): Sample size: 11; mean: 0.002365; standard deviation: 0.000021 * Virtual time (s): 90% confidence interval: +/- 0.000012 (0.5%): [0.002354..0.002377] * * The last three lines can be suppressed by passing the option -q (quiet). */ #include "config.h" #ifdef HAVE_LIBPAPI #define _GNU_SOURCE #include // papi.h needs caddr_t #include #include #endif #include #include #include #include #include #include #include #include "benchmarking.h" #include "m4ri/m4ri.h" enum { C80, C90, C95, C98, C99 }; /* * Command line option decoding */ int bench_quiet = 0; // Set if -q is used. int bench_dump = 0; // Set if -d is used. int bench_minimum = 2; // Minimum number of measurements. Set with -m . int bench_maximum = 1000; // Maximum number of measurements. Set with -n . unsigned long long bench_maxtime = 60000000; // Maximum number of microseconds to run. Set with -t , in seconds (floating point). double bench_accuracy = 0.01; // The +/- range (where 1.0 is 100%) within that we want the real population mean to be with the given confidence. Set with -a int bench_confidence_index = C99; // The confidence that the real mean is within the given (or found) range. int bench_stats = 1; // The counter used for statistics (0 = realtime, 1 = cpuclocks). Set with -s . int bench_dump_counter = -1; // The counter to dump (see bench_stats). Set with -d . If not given all counters are dumped. char const* progname; // Set to argv[0]. /* * Command line option used by bench_packedmatrix.c */ uint64_t bench_count = 0; // Can be set by -x , otherwise a reasonable default is being used. #ifdef HAVE_LIBPAPI int bench_disregard_L2_misses = 0; // Set if -2 is used. /* * PAPI events being counted. */ int papi_events[32] = { PAPI_TOT_CYC, /* Total cycles. This must always be the first entry. */ }; int papi_array_len = 1; int bench_PAPI_L2_TCM_index; char* papi_event_name(int event) { // PAPI needs to be initialized before calling PAPI_event_code_to_name. if (PAPI_is_initialized() == PAPI_NOT_INITED) { int res = PAPI_library_init(PAPI_VER_CURRENT); if (res != PAPI_OK && res != PAPI_VER_CURRENT) { fprintf(stderr, "%s: PAPI_library_init: error code %d %s\n", progname, res, PAPI_strerror(res)); m4ri_die("PAPI failed to initialize.\n"); } } static char buf[PAPI_MAX_STR_LEN]; int res = PAPI_event_code_to_name(event, buf); if (res) snprintf(buf, PAPI_MAX_STR_LEN, "", event); return buf; } int papi_add_event(char const* event_name) { // PAPI needs to be initialized before calling PAPI_event_name_to_code. if (PAPI_is_initialized() == PAPI_NOT_INITED) { int res = PAPI_library_init(PAPI_VER_CURRENT); if (res != PAPI_OK && res != PAPI_VER_CURRENT) { fprintf(stderr, "%s: PAPI_library_init: error code %d %s\n", progname, res, PAPI_strerror(res)); m4ri_die("PAPI failed to initialize.\n"); } } int event; int res = PAPI_event_name_to_code((char*)event_name, &event); if (res != PAPI_OK) { if (res == PAPI_ENOEVNT) fprintf(stderr, "%s: %s: No such event.\n", progname, event_name); else fprintf(stderr, "%s: PAPI_event_name_to_code(\"%s\"): %s\n", progname, event_name, PAPI_strerror(res)); return res; } int found = 0; for (int nv = 0; nv < papi_array_len; ++nv) { if (papi_events[nv] == event) { found = 1; break; } } if (!found) papi_events[papi_array_len++] = event; return 0; } void papi_add_events(char* event_names) { char* tmpptr; char* name = strtok_r(event_names, ", ", &tmpptr); while (name) { papi_add_event(name); name = strtok_r(NULL, ", ", &tmpptr); } } #endif // HAVE_LIBPAPI int global_options(int* argcp, char*** argvp) { int result = 0; progname = (*argvp)[0]; while((*argcp) > 1) { if ((*argvp)[1][0] != '-' || (*argvp)[1][1] == '\0' || (*argvp)[1][2] != '\0') return result; switch((*argvp)[1][1]) { case 'd': bench_dump = 1; if (isdigit((*argvp)[2][0])) { ++*argvp; --*argcp; bench_dump_counter = atoi((*argvp)[1]); } break; case 'q': bench_quiet = 1; break; #ifdef HAVE_LIBPAPI case '2': { bench_disregard_L2_misses = 1; if (papi_add_event("PAPI_L2_TCM")) { fprintf(stderr, "%s: Ignoring -2: Level 2 cache misses cannot be detected with the current set of PAPI events (-p).\n", progname); bench_disregard_L2_misses = 0; } for (int nv = 0; nv < papi_array_len; ++nv) { if (papi_events[nv] == PAPI_L2_TCM) { bench_PAPI_L2_TCM_index = nv + 1; // +1 for in data[] inserted virtual time at index 0. break; } } break; } case 'p': { ++*argvp; --*argcp; papi_add_events((*argvp)[1]); break; } #endif case 'm': ++*argvp; --*argcp; bench_minimum = atoi((*argvp)[1]); break; case 'n': ++*argvp; --*argcp; bench_maximum = atoi((*argvp)[1]); break; case 't': ++*argvp; --*argcp; bench_maxtime = 1000000 * strtod((*argvp)[1], NULL); break; case 'a': ++*argvp; --*argcp; bench_accuracy = strtod((*argvp)[1], NULL); break; case 'c': { ++*argvp; --*argcp; int confidence = atoi((*argvp)[1]); switch (confidence) { case 80: bench_confidence_index = C80; break; case 90: bench_confidence_index = C90; break; case 95: bench_confidence_index = C95; break; case 98: bench_confidence_index = C98; break; case 99: bench_confidence_index = C99; break; default: m4ri_die("The only possible confidence percentages are 80, 90, 95, 98 and 99%\n"); break; } break; } case 'x': ++*argvp; --*argcp; bench_count = atoll((*argvp)[1]); break; case 's': ++*argvp; --*argcp; bench_stats = atoi((*argvp)[1]); break; default: return -1; } ++result; ++*argvp; --*argcp; } return result; } void bench_print_global_options(FILE* out) { fprintf(out, "OPTIONS\n"); fprintf(out, " -m Do at least number of measurements. Default 2.\n"); fprintf(out, " -n Do at most number of measurements. Default 1000.\n"); fprintf(out, " -t Stop after seconds. Default 60.0 seconds.\n"); fprintf(out, " -a Stop after has been reached. Default 0.01 (= 1%%).\n"); fprintf(out, " -c Stop when accuracy has been reached with this confidence. Default 99 (%%).\n"); fprintf(out, " -s Counter to perform statistic over (0: realtime, 1: cpuclocks. Default: 1).\n"); fprintf(out, " -x Call function times in the inner most loop (calls per measurement).\n"); fprintf(out, " -d [] Dump measurements. Dump all or only when given.\n"); fprintf(out, " -q Quiet. Suppress printing of statistics.\n"); #ifdef HAVE_LIBPAPI fprintf(out, " -2 Disregard measurements with any level 2 cache misses.\n"); fprintf(out, " -p [,,...]\n"); fprintf(out, " Count and report the given events. The list is comma or space separated,\n"); fprintf(out, " for example -p \"PAPI_TOT_INS PAPI_L1_DCM\".\n"); fprintf(out, " Run `papi_event_chooser PRESET PAPI_TOT_CYC [PAPI_*]` for more events.\n"); #endif } /* * vector implementation * * vector_create: Create vector of size s. * vector_destruct: Destruct vector. * vector_resize: Resize internal allocation. * vector_size: Return number of elements. * vector_pushback: Add one element at the end. * vector_get: Get element at position index. */ struct vector_st { size_t alloc_size; size_t size; double* data; }; typedef struct vector_st* vector; vector vector_create(size_t s) { vector v = (vector)malloc(sizeof(struct vector_st)); v->alloc_size = s; v->data = s ? (double*)malloc(sizeof(double) * s) : NULL; v->size = 0; return v; } void vector_destruct(vector v) { free(v->data); free(v); } void vector_resize(vector v, size_t s) { v->data = (double*)realloc(v->data, sizeof(double) * s); v->alloc_size = s; if (v->size > v->alloc_size) v->size = v->alloc_size; } static inline size_t vector_size(vector v) { return v->size; } void vector_pushback(vector v, double d) { if (++(v->size) > v->alloc_size) vector_resize(v, v->alloc_size * 2); v->data[v->size - 1] = d; } static inline double vector_get(vector v, int index) { return v->data[index]; } /* * Normal distribution * * normal_calculate: Calculate the mean and standard deviation of the data in vector v. * * Returns -1 on failure (not enough data points), 0 otherwise. */ struct normal_st { int size; double mean; double sigma; }; typedef struct normal_st normal; int normal_calculate(vector v, normal* dist, double multiplier) { dist->size = vector_size(v); if (dist->size < 2) return -1; // Calculate the sum of all data. double sum = 0; for (int i = 0; i < dist->size; ++i) sum += vector_get(v, i) * multiplier; dist->mean = sum / dist->size; // Calculate the sum of the square of all differences with mean. sum = 0; for (int i = 0; i < dist->size; ++i) { double delta = vector_get(v, i) * multiplier - dist->mean; sum += delta * delta; } dist->sigma = sqrt(sum / (dist->size - 1)); return 0; } /* * T-Table */ static float student_t[5][34] = { { 3.078, 1.886, 1.638, 1.533, 1.476, 1.440, 1.415, 1.397, 1.383, 1.372, 1.363, 1.356, 1.350, 1.345, 1.341, 1.337, 1.333, 1.330, 1.328, 1.325, 1.323, 1.321, 1.319, 1.318, 1.316, 1.315, 1.314, 1.313, 1.311, 1.310, 1.303, 1.296, 1.289, 1.282 }, { 6.314, 2.920, 2.353, 2.132, 2.015, 1.943, 1.895, 1.860, 1.833, 1.812, 1.796, 1.782, 1.771, 1.761, 1.753, 1.746, 1.740, 1.734, 1.729, 1.725, 1.721, 1.717, 1.714, 1.711, 1.708, 1.706, 1.703, 1.701, 1.699, 1.697, 1.684, 1.671, 1.658, 1.645 }, { 12.706, 4.303, 3.182, 2.776, 2.571, 2.447, 2.365, 2.306, 2.262, 2.228, 2.201, 2.179, 2.160, 2.145, 2.131, 2.120, 2.110, 2.101, 2.093, 2.086, 2.080, 2.074, 2.069, 2.064, 2.060, 2.056, 2.052, 2.048, 2.045, 2.042, 2.021, 2.000, 1.980, 1.960 }, { 31.821, 6.965, 4.541, 3.747, 3.365, 3.143, 2.998, 2.896, 2.821, 2.764, 2.718, 2.681, 2.650, 2.624, 2.602, 2.583, 2.567, 2.552, 2.539, 2.528, 2.518, 2.508, 2.500, 2.492, 2.485, 2.479, 2.473, 2.467, 2.462, 2.457, 2.423, 2.390, 2.358, 2.326 }, { 63.657, 9.925, 5.841, 4.604, 4.032, 3.707, 3.499, 3.355, 3.250, 3.169, 3.106, 3.055, 3.012, 2.977, 2.947, 2.921, 2.898, 2.878, 2.861, 2.845, 2.831, 2.819, 2.807, 2.797, 2.787, 2.779, 2.771, 2.763, 2.756, 2.750, 2.704, 2.660, 2.617, 2.576 } }; static float student_t_certainty[5] = { 0.2, 0.1, 0.05, 0.02, 0.01 }; // Two-tails. static float t_table(int confidence_index, int freedoms) { if (freedoms <= 30) return student_t[confidence_index][freedoms - 1]; double a, b, y1, y2, y3; long x1, x2; long x3 = 0; int i; if (freedoms <= 60) { i = 29; x1 = 30; x2 = 40; x3 = 60; } else if (freedoms <= 120) { i = 30; x1 = 40; x2 = 60; x3 = 120; } else { i = 31; x1 = 60; x2 = 120; /* x3 = infinity */ } y1 = student_t[confidence_index][i]; y2 = student_t[confidence_index][i + 1]; y3 = student_t[confidence_index][i + 2]; if (freedoms <= 120) { double c, d; d = (x1 * x1 * (x3 - x2) + x2 * x2 * (x1 - x3) + x3 * x3 * (x2 - x1)); a = - (x1 * (y3 - y2) + x2 * (y1 - y3) + x3 * (y2 - y1)) / d; b = (x1 * x1 * (y3 - y2) + x2 * x2 * (y1 - y3) + x3 * x3 * (y2 - y1)) / d; c = y2 - a * x2 * x2 - b * x2; return (a * freedoms * freedoms + b * freedoms + c); } double ln1, ln2; ln1 = log(y2 - y3); ln2 = log(y1 - y3); a = - ( ln1 - ln2) / (x1 - x2); b = (x1 * ln1 - x2 * ln2) / (x1 - x2); return (y3 + exp(a * freedoms + b)); } /* * walltime */ unsigned long long walltime(unsigned long long t0) { static time_t base_sec; struct timeval tp; gettimeofday(&tp, NULL); if (__M4RI_UNLIKELY(base_sec == 0)) base_sec = tp.tv_sec; return (tp.tv_sec - base_sec) * 1000000 + tp.tv_usec - t0; } /* * Printing doubles. */ int bench_precision(double sigma) { if (sigma < 1E-10) return 12; int log_sigma = log10(sigma); if (log_sigma >= 2) return 0; return 2 - log_sigma; } void print_double(double d, int precision) { switch(precision) { case 0: printf("%.0f", d); break; case 1: printf("%.1f", d); break; case 2: printf("%.2f", d); break; case 3: printf("%.3f", d); break; case 4: printf("%.4f", d); break; case 5: printf("%.5f", d); break; case 6: printf("%.6f", d); break; case 7: printf("%.7f", d); break; case 8: printf("%.8f", d); break; case 9: printf("%.9f", d); break; case 10: printf("%.10f", d); break; case 11: printf("%.11f", d); break; case 12: printf("%.12f", d); break; } } /* * run_bench * * Benchmark main loop. */ int run_bench( int (*f)(void* params, unsigned long long* data, int *data_len), void* params, unsigned long long* data, int data_len) { double const CONFIDENCE = 1.0 - student_t_certainty[bench_confidence_index]; unsigned long long data_sum[32]; memset(data_sum, 0, sizeof(data_sum)); data_len = MIN(data_len, sizeof(data_sum) / sizeof(unsigned long long)); vector stats_data = vector_create(128); normal stats; #ifdef HAVE_LIBPAPI int total_calls = 0; #endif if (!bench_count) bench_count = 1; unsigned long long start_walltime = walltime(0); for (int n = 1; n <= bench_maximum; ++n) { if (!bench_quiet && !bench_dump) { printf("."); fflush(stdout); } do { int res = f(params, data, &data_len); if (res < 0) m4ri_die("benchmark function failed with exit code: %d\n", res); #ifdef HAVE_LIBPAPI ++total_calls; #endif } #ifdef HAVE_LIBPAPI while(bench_disregard_L2_misses && data[bench_PAPI_L2_TCM_index]); #else while(0); #endif if (bench_dump) { if (bench_dump_counter >= 0 && bench_dump_counter < data_len) printf("%llu", data[bench_dump_counter]); else { printf("%llu", data[0]); for (int nv = 1; nv < data_len; ++nv) printf(" %llu", data[nv]); } printf("\n"); fflush(stdout); } vector_pushback(stats_data, data[bench_stats]); for (int nv = 0; nv < data_len; ++nv) data_sum[nv] += data[nv]; if (n >= bench_minimum && normal_calculate(stats_data, &stats, (bench_stats == 0) ? 0.000001 : (1.0 / bench_count)) == 0) { double standard_error = stats.sigma / sqrt(stats.size); double critical_value = t_table(bench_confidence_index, stats.size - 1); // Stop when the real mean lays with CONFIDENCE in the range [mean * (1 - bench_accuracy), mean * (1 + bench_accuracy)]. // or when we're already running bench_maxtime seconds. if (standard_error * critical_value / stats.mean <= bench_accuracy || walltime(start_walltime) > bench_maxtime) break; } } for (int nv = 0; nv < data_len; ++nv) data[nv] = (data_sum[nv] + stats.size / 2) / stats.size; if (!bench_quiet) { if (!bench_quiet && !bench_dump) printf("\n"); printf("Total running time: %6.3f seconds.\n", walltime(start_walltime) / 1000000.0); #ifdef HAVE_LIBPAPI if (bench_disregard_L2_misses) printf("Samples disregarded because of level 2 cache misses: %d\n", total_calls - stats.size); #endif int precision = bench_precision(stats.sigma); #ifdef HAVE_LIBPAPI if (bench_stats) printf("%s: ", papi_event_name(papi_events[bench_stats - 1])); else printf("Virtual time (s): "); #endif printf("Sample size: %d; mean: ", stats.size); print_double(stats.mean, precision); printf("; standard deviation: "); print_double(stats.sigma, precision); printf("\n"); #ifdef HAVE_LIBPAPI if (bench_stats) printf("%s: ", papi_event_name(papi_events[bench_stats - 1])); else printf("Virtual time (s): "); #endif double standard_error = stats.sigma / sqrt(stats.size); double critical_value = t_table(bench_confidence_index, stats.size - 1); double accuracy = standard_error * critical_value; printf("%2.0f%% confidence interval: +/- ", CONFIDENCE * 100); print_double(accuracy, precision); printf(" (%.1f%%): [", accuracy / stats.mean * 100); print_double(stats.mean - accuracy, precision); printf(".."); print_double(stats.mean + accuracy, precision); printf("]\n"); } vector_destruct(stats_data); return data_len; } /* * Randomize */ // The same as m4ri_random_word. Duplicated here because it's // not available in older revisions that we want to benchmark against. word bench_random_word() { // random() only returns 31 bits, so we need three calls. word a0 = random(); word a1 = random(); word a2 = random(); word v = a0 ^ (a1 << 24) ^ a2 << 48; #ifdef BENCH_RANDOM_REVERSE v = ((v >> 1) & 0x5555555555555555ULL) | ((v & 0x5555555555555555ULL) << 1); v = ((v >> 2) & 0x3333333333333333ULL) | ((v & 0x3333333333333333ULL) << 2); v = ((v >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((v & 0x0F0F0F0F0F0F0F0FULL) << 4); v = ((v >> 8) & 0x00FF00FF00FF00FFULL) | ((v & 0x00FF00FF00FF00FFULL) << 8); v = ((v >> 16) & 0x0000FFFF0000FFFFULL) | ((v & 0x0000FFFF0000FFFFULL) << 16); v = (v >> 32) | (v << 32); #endif return v; } /* * Random number generator */ static uint64_t bench_random_M; static uint64_t bench_random_modulo; void bench_random_init(uint64_t modulo) { // Set bench_random_M to the largest multiple of modulo, minus one, that fits in an uint64_t. // A modulo of zero is interpreted as 2^64, and thus returns 0xffffffffffffffff. bench_random_M = modulo ? -modulo / modulo * modulo - 1 : -1; bench_random_M += modulo; bench_random_modulo = modulo; } // Returns a uniformly distributed random number in the range [0, bench_random_modulo>. uint64_t bench_random() { for(;;) { word R = bench_random_word(); if (R <= bench_random_M) return R % bench_random_modulo; } } libm4rie-20130416/bench/benchmarking.h000066400000000000000000000015571212302364300173470ustar00rootroot00000000000000#ifndef BENCHMARKETING_H #define BENCHMARKETING_H #include /* * Command line options. See benchmarking.h for documentation. */ extern int bench_quiet; extern int bench_dump; extern int bench_minimum; extern int bench_maximum; extern unsigned long long bench_maxtime; extern double bench_accuracy; extern int bench_confidence_index; extern char const* progname; extern uint64_t bench_count; unsigned long long walltime(unsigned long long t0); int global_options(int* argcp, char*** argvp); void bench_print_global_options(FILE*); int run_bench( int (*f)(void* params, unsigned long long* data, int *data_len), void* params, unsigned long long* data, int data_len); #ifdef HAVE_LIBPAPI extern int papi_events[]; extern int papi_array_len; char* papi_event_name(int event); #endif #define __M4RIE_OMEGA 2.80735492205760 #endif //BENCHMARKETING_H libm4rie-20130416/bench/cpucycles-20060326/000077500000000000000000000000001212302364300174305ustar00rootroot00000000000000libm4rie-20130416/bench/cpucycles-20060326/alpha.c000066400000000000000000000027351212302364300206700ustar00rootroot00000000000000/* cpucycles/alpha.c version 20060316 D. J. Bernstein Public domain. */ #include #include #include static long long tod(void) { struct timeval t; gettimeofday(&t,(struct timezone *) 0); return t.tv_sec * (long long) 1000000 + t.tv_usec; } static long long rpcc(void) { unsigned long long t; asm volatile("rpcc %0" : "=r"(t)); return t & 0xffffffff; } static long long firstrpcc; static long long firsttod; static long long lastrpcc; static long long lasttod; static double mhz = 0; static void init(void) { firstrpcc = rpcc(); firsttod = tod(); do { lastrpcc = rpcc(); lasttod = tod(); } while (lasttod - firsttod < 10000); lastrpcc -= firstrpcc; lastrpcc &= 0xffffffff; lasttod -= firsttod; mhz = (double) lastrpcc / (double) lasttod; } long long cpucycles_alpha(void) { double x; long long y; if (!mhz) init(); lastrpcc = rpcc(); lasttod = tod(); lastrpcc -= firstrpcc; lastrpcc &= 0xffffffff; lasttod -= firsttod; /* Number of cycles since firstrpcc is lastrpcc + 2^32 y for unknown y. */ /* Number of microseconds since firsttod is lasttod. */ x = (lasttod * mhz - lastrpcc) * 0.00000000023283064365386962890625; y = x; while (x > y + 0.5) y += 1; while (x < y - 0.5) y -= 1; y *= 4294967296ULL; lastrpcc += y; mhz = (double) lastrpcc / (double) lasttod; return firstrpcc + lastrpcc; } long long cpucycles_alpha_persecond(void) { if (!mhz) init(); return 1000000.0 * mhz; } libm4rie-20130416/bench/cpucycles-20060326/alpha.h000066400000000000000000000007171212302364300206730ustar00rootroot00000000000000/* cpucycles alpha.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_alpha_h #define CPUCYCLES_alpha_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_alpha(void); extern long long cpucycles_alpha_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "alpha" #define cpucycles cpucycles_alpha #define cpucycles_persecond cpucycles_alpha_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/amd64cpuinfo.c000066400000000000000000000011101212302364300220640ustar00rootroot00000000000000#include #include long long cpucycles_amd64cpuinfo(void) { unsigned long long result; asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (result) :: "%rdx"); return result; } long long cpucycles_amd64cpuinfo_persecond(void) { FILE *f; double result; int s; f = fopen("/proc/cpuinfo","r"); if (!f) return 0; for (;;) { s = fscanf(f,"cpu MHz : %lf",&result); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { result = 0; break; } } fclose(f); return 1000000.0 * result; } libm4rie-20130416/bench/cpucycles-20060326/amd64cpuinfo.h000066400000000000000000000010071212302364300220760ustar00rootroot00000000000000/* cpucycles amd64cpuinfo.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_amd64cpuinfo_h #define CPUCYCLES_amd64cpuinfo_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_amd64cpuinfo(void); extern long long cpucycles_amd64cpuinfo_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "amd64cpuinfo" #define cpucycles cpucycles_amd64cpuinfo #define cpucycles_persecond cpucycles_amd64cpuinfo_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/amd64tscfreq.c000066400000000000000000000006371212302364300221050ustar00rootroot00000000000000#include #include long long cpucycles_amd64tscfreq(void) { unsigned long long result; asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (result) :: "%rdx"); return result; } long long cpucycles_amd64tscfreq_persecond(void) { long result = 0; size_t resultlen = sizeof(long); sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0); return result; } libm4rie-20130416/bench/cpucycles-20060326/amd64tscfreq.h000066400000000000000000000010071212302364300221020ustar00rootroot00000000000000/* cpucycles amd64tscfreq.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_amd64tscfreq_h #define CPUCYCLES_amd64tscfreq_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_amd64tscfreq(void); extern long long cpucycles_amd64tscfreq_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "amd64tscfreq" #define cpucycles cpucycles_amd64tscfreq #define cpucycles_persecond cpucycles_amd64tscfreq_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/clockmonotonic.c000066400000000000000000000012761212302364300226230ustar00rootroot00000000000000#include #include #include #include #include #include static double cpufrequency = 0; static void init(void) { long result = 0; size_t resultlen = sizeof(long); sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0); cpufrequency = result; } long long cpucycles_clockmonotonic(void) { double result; struct timespec t; if (!cpufrequency) init(); clock_gettime(CLOCK_MONOTONIC,&t); result = t.tv_nsec; result *= 0.000000001; result += (double) t.tv_sec; result *= cpufrequency; return result; } long long cpucycles_clockmonotonic_persecond(void) { if (!cpufrequency) init(); return cpufrequency; } libm4rie-20130416/bench/cpucycles-20060326/clockmonotonic.h000066400000000000000000000010271212302364300226220ustar00rootroot00000000000000/* cpucycles clockmonotonic.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_clockmonotonic_h #define CPUCYCLES_clockmonotonic_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_clockmonotonic(void); extern long long cpucycles_clockmonotonic_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "clockmonotonic" #define cpucycles cpucycles_clockmonotonic #define cpucycles_persecond cpucycles_clockmonotonic_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/compile000077500000000000000000000012731212302364300210110ustar00rootroot00000000000000#!/bin/sh case "$COMPILER" in suncc) case "$ARCHITECTURE" in 64) /opt/SUNWspro/bin/cc -xarch=v9 -O2 "$@" ;; 32) /opt/SUNWspro/bin/cc -xarch=v8 -O2 "$@" ;; *) /opt/SUNWspro/bin/cc -O2 "$@" ;; esac ;; ibmcc) case "$ARCHITECTURE" in 64) xlc -q64 -O2 "$@" ;; 32) xlc -q32 -O2 "$@" ;; *) xlc -O2 "$@" ;; esac ;; hpcc) case "$ARCHITECTURE" in 64) /opt/ansic/bin/cc +DD64 -O2 "$@" ;; 32) /opt/ansic/bin/cc +DD32 -O2 "$@" ;; *) /opt/ansic/bin/cc -O2 "$@" ;; esac ;; *) case "$ARCHITECTURE" in 64) gcc -m64 -O2 "$@" ;; 32) gcc -m32 -O2 "$@" ;; *) gcc -O2 "$@" ;; esac ;; esac libm4rie-20130416/bench/cpucycles-20060326/cpucycles.html000066400000000000000000000316651212302364300223230ustar00rootroot00000000000000 cpucycles: counting CPU cycles

cpucycles: counting CPU cycles

A C or C++ program can call cpucycles() to receive a long long cycle count. The program has to
     #include "cpucycles.h"
and link to cpucycles.o. The program can look at the constant string cpucycles_implementation to see which implementation of cpucycles it's using. The program can also call cpucycles_persecond() to receive a long long estimate of the number of cycles per second.

Here's how to create cpucycles.h and cpucycles.o:

     wget http://ebats.cr.yp.to/cpucycles-20060326.tar.gz
     gunzip < cpucycles-20060326.tar.gz | tar -xf -
     cd cpucycles-20060326
     sh do
The do script creates cpucycles.h and cpucycles.o. It also prints one line of output showing the implementation selected, the number of cycles per second, a double-check of the number of cycles per second, and the differences between several adjacent calls to the cpucycles() function.

Some systems have multiple incompatible formats for executable programs. The most important reason is that some CPUs (the Athlon 64, for example, and the UltraSPARC) have two incompatible modes, a 32-bit mode and a 64-bit mode. On these systems, you can run

     env ARCHITECTURE=32 sh do
to create a 32-bit cpucycles.o or
     env ARCHITECTURE=64 sh do
to create a 64-bit cpucycles.o.

Notes on accuracy

Benchmarking tools are encouraged to record several timings of a function: call cpucycles(), function(), cpucycles(), function(), etc., and then print one line reporting the differences between successive cpucycles() results. The median of several differences is much more stable than the average.

Cycle counts continue to increase while other programs are running, while the operating system is handling an interruption such as a network packet, etc. This won't affect the median of several timings of a fast function---the function usually won't be interrupted---but it can affect the median of several timings of a slow function. Hopefully a benchmarking machine isn't running other programs.

On dual-CPU systems (and dual-core systems such as the Athlon 64 X2), the CPUs often don't have synchronized cycle counters, so a process that switches CPUs can have its cycle counts jump forwards or backwards. I've never seen this affect the median of several timings.

Some CPUs dynamically reduce CPU speed to save power, but deliberately keep their cycle counters running at full speed, the idea being that measuring time is more important than measuring cycles. Hopefully a benchmarking machine won't enter power-saving mode.

Cycle counts are occasionally off by a multiple of 2^32 on some CPUs, as discussed below. I've never seen this affect the median of several timings.

The estimate returned by cpucycles_persecond() may improve accuracy after cpucycles() has been called repeatedly.

Implementations

alpha. The Alpha's built-in cycle-counting function counts cycles modulo 2^32. cpucycles usually manages to fix this by calling gettimeofday (which takes a large but low-variance number of cycles) and automatically estimating the chip speed. In extreme situations the resulting cycle counts could still be off by a multiple of 2^32.

Results on td161: alpha 499845359 499838717 423 360 336 349 353 348 469 329 348 345 348 345 348 345 348 345 348 345 348 348 348 345 348 345 348 345 348 348 348 345 348 345 348 345 348 348 348 345 348 345 348 345 348 348 348 345 348 345 348 345 348 348 348 468 318 348 345 348 345 348 345 348 345 348

amd64cpuinfo. cpucycles uses the CPU's RDTSC instruction to count cycles, and reads /proc/cpuinfo to see the kernel's estimate of cycles per second.

Results on dancer with ARCHITECTURE=64 (default): amd64cpuinfo 2002653000 2002526765 22 9 9 8 8 17 6 10 5 9 8 8 8 17 6 10 5 9 8 8 8 17 6 10 5 9 8 8 8 17 6 10 5 9 8 8 11 14 15 28 10 8 9 12 23 106 10 8 8 8 8 8 8 17 6 10 5 9 8 8 8 17 6 10

amd64tscfreq. cpucycles uses the CPU's RDTSC instruction to count cycles, and uses sysctlbyname("machdep.tsc_freq",...) to see the kernel's estimate of cycles per second.

clockmonotonic. Backup option, using the POSIX clock_gettime(CLOCK_MONOTONIC) function to count nanoseconds and using sysctlbyname("machdep.tsc_freq",...) to see the kernel's estimate of cycles per second. This often has much worse than microsecond precision.

Results on whisper (artificially induced): clockmonotonic 1298904202 1298866469 2177 1815 2177 2177 1814 2177 2178 2177 1814 2178 2177 1814 2177 2177 1815 2177 2177 1814 2177 2179 1813 2178 2177 1815 2177 2177 1814 2177 2177 2177 1815 2178 2177 1813 2178 2177 1815 2177 2177 1814 2177 2177 1815 2177 2177 1814 2177 2179 2177 1814 2177 2177 2177 1815 2177 2177 1814 2177 2178 1814 2178 2177 1814 2177

gettimeofday. Backup option, using the POSIX gettimeofday() function to count microseconds and /proc/cpuinfo to see the kernel's estimate of cycles per second. This often has much worse than microsecond precision.

Results on dancer (artificially induced) with ARCHITECTURE=32: gettimeofday 2002653000 2002307748 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 4005 0 4005 2003 0 4005 2003 2002 2003 2003 2002 2003 2003 2002 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 2003 2002 2003 4005 0 2003 4005 0 4006 2002 2003

Results on dancer (artificially induced) with ARCHITECTURE=64 (default): gettimeofday 2002653000 2002293956 2560 1792 2048 1792 2048 2304 1792 2048 1792 0 2048 2304 2048 1792 1792 2048 0 2304 2048 1792 2048 1792 2304 0 2048 1792 2048 1792 2048 0 2304 2048 1792 1792 2048 2304 2048 0 1792 2048 1792 2304 2048 1792 2048 0 1792 2560 1792 1792 2048 1792 0 2560 25600 2048 1792 2560 1792 0 2048 1792 2048 2304

hppapstat. cpucycles uses the CPU's MFCTL %cr16 instruction to count cycles, and pstat(PSTAT_PROCESSOR,...) to see the kernel's estimate of cycles per second.

Results on hp400: hppapstat 440000000 439994653 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11

powerpcaix. cpucycles uses the CPU's MFTB instruction to count ``time base''; uses /usr/sbin/lsattr -E -l proc0 -a frequency to see the kernel's estimate of cycles per second; and spends some time comparing MFTB to gettimeofday() to figure out the number of time-base counts per second.

I've seen a 533MHz PowerPC G4 (7410) with a 16-cycle time base; a 668MHz POWER RS64 IV (SStar) system with a 1-cycle time base; a 1452MHz POWER with an 8-cycle time base; and a 2000MHz PowerPC G5 (970) with a 60-cycle time base.

Results on tigger: powerpcaix 1452000000 1451981436 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64 56 64 64 64

powerpclinux. cpucycles uses the CPU's MFTB instruction to count ``time base''; reads /proc/cpuinfo to see the kernel's estimate of cycles per second; and spends some time comparing MFTB to gettimeofday() to figure out the number of time-base counts per second.

Results on gggg: powerpclinux 533000000 532650134 48 32 48 32 32 48 32 32 48 32 32 48 32 32 48 32 32 48 32 32 32 48 32 32 48 32 32 48 32 32 48 32 32 48 32 32 32 48 32 32 48 32 32 48 32 32 48 32 32 48 32 32 32 48 32 32 48 32 32 48 32 32 48 32

powerpcmacos. cpucycles uses the mach_absolute_time function to count ``time base''; uses sysctlbyname("hw.cpufrequency",...) to see the kernel's estimate of cycles per second; and uses sysctlbyname("hw.tbfrequency",...) to see the kernel's estimate of time-base counts per second.

Results on geespaz with ARCHITECTURE=32 (default): powerpcmacos 2000000000 1999891801 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60

Results on geespaz with ARCHITECTURE=64: powerpcmacos 2000000000 1999896339 420 60 60 60 60 60 60 0 60 60 60 60 60 60 60 60 0 60 60 60 60 60 60 60 60 60 0 60 60 60 60 60 60 60 60 60 0 60 60 60 60 60 60 60 0 60 60 60 60 60 60 60 60 0 60 60 60 60 60 60 60 60 0 60

sparc32psrinfo. cpucycles uses the CPU's RDTICK instruction in 32-bit mode to count cycles, and runs /usr/sbin/psrinfo -v to see the kernel's estimate of cycles per second.

Results on icarus with ARCHITECTURE=32 (default): sparc32psrinfo 900000000 899920056 297 23 23 18 22 23 18 17 22 18 17 22 23 18 17 129 17 17 17 17 17 17 17 97 17 17 17 17 17 17 17 85 17 17 17 17 17 17 17 97 17 17 17 17 17 17 17 85 17 17 17 17 17 17 17 97 17 17 17 17 17 17 17 85

Results on wessel with ARCHITECTURE=32 (default): sparc32psrinfo 900000000 899997269 39 23 18 22 18 25 72 17 22 18 17 22 23 26 71 17 17 17 17 17 17 17 85 17 17 17 17 17 17 17 97 17 17 17 17 17 17 17 85 17 17 17 17 17 17 17 97 17 17 17 17 17 17 17 85 17 17 17 17 17 17 17 109 17

sparcpsrinfo. cpucycles uses the CPU's RDTICK instruction in 64-bit mode to count cycles, and runs /usr/sbin/psrinfo -v to see the kernel's estimate of cycles per second.

Results on icarus with ARCHITECTURE=64: sparcpsrinfo 900000000 899920264 289 12 12 12 12 12 12 19 12 113 19 12 12 12 12 12 12 130 12 12 12 12 12 12 12 144 12 12 12 12 12 12 12 144 12 12 12 12 12 12 12 144 12 12 12 12 12 12 12 144 12 12 12 12 12 12 12 144 12 12 12 12 12 12

Results on wessel with ARCHITECTURE=64: sparcpsrinfo 900000000 899997032 29 19 12 19 19 19 12 12 123 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12 174 12 12 12 12 12 12 12

x86cpuinfo. cpucycles uses the CPU's RDTSC instruction to count cycles, and reads /proc/cpuinfo to see the kernel's estimate of cycles per second. There have been reports of the 64-bit cycle counters on some x86 CPUs being occasionally off by 2^32; cpucycles makes no attempt to fix this.

Results on cruncher: x86cpuinfo 132957999 132951052 60 36 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32

Results on dali: x86cpuinfo 448882000 448881565 49 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45

Results on dancer with ARCHITECTURE=32: x86cpuinfo 2002653000 2002538651 26 11 9 11 10 17 11 10 10 10 9 10 9 12 9 173 11 10 10 10 10 17 11 10 10 10 9 10 9 17 11 10 10 10 9 10 9 17 11 10 10 10 9 10 9 17 11 10 10 10 9 10 9 17 11 10 10 10 9 10 9 17 11 10

Results on fireball: x86cpuinfo 1894550999 1894188944 104 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88

Results on neumann: x86cpuinfo 999534999 999456935 49 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44

Results on rzitsc: x86cpuinfo 2799309000 2799170567 132 96 100 104 100 100 96 96 96 100 96 108 104 104 112 96 112 96 108 96 112 96 96 96 100 112 120 100 96 100 104 112 96 96 96 88 96 128 108 96 116 96 100 100 108 96 100 96 108 96 104 100 112 96 100 96 100 100 88 108 100 108 92 96

Results on shell: x86cpuinfo 3391548999 3391341751 108 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88

Results on thoth: x86cpuinfo 900447000 900028758 67 19 18 18 19 188 16 16 16 19 19 18 19 147 16 16 16 19 19 17 16 16 16 16 16 19 19 17 16 16 16 16 16 19 19 17 16 16 16 16 16 19 19 18 19 156 16 16 16 19 19 18 19 147 16 16 16 19 19 18 19 147 16 16

x86tscfreq. cpucycles uses the CPU's RDTSC instruction to count cycles, and uses sysctlbyname("machdep.tsc_freq",...) to see the kernel's estimate of cycles per second.

Results on whisper: x86tscfreq 1298904202 1298892874 72 72 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53

Version

This is the cpucycles-20060326.html web page. This web page is in the public domain. libm4rie-20130416/bench/cpucycles-20060326/do000066400000000000000000000021701212302364300177550ustar00rootroot00000000000000#!/bin/sh output="cpucycles.o cpucycles.h" cleanup="test cpucycles-impl.o cpucycles-impl.h cpucycles-impl.c" exec 2>do.notes rm -f $output $cleanup ( echo amd64tscfreq gcc echo amd64cpuinfo gcc echo x86tscfreq gcc echo x86cpuinfo gcc echo powerpclinux gcc echo powerpcmacos gcc echo powerpcaix gcc echo powerpcaix ibmcc echo sparcpsrinfo gcc echo sparcpsrinfo suncc echo sparc32psrinfo gcc echo sparc32psrinfo suncc echo hppapstat gcc echo hppapstat hpcc echo alpha gcc echo clockmonotonic gcc echo gettimeofday gcc ) | ( while read name compiler do echo ===== Trying $name.c with $compiler... >&2 rm -f $cleanup cp $name.c cpucycles-impl.c || continue cp $name.h cpucycles-impl.h || continue env COMPILER=$compiler ./compile -c cpucycles-impl.c || continue env COMPILER=$compiler ./compile -o test test.c cpucycles-impl.o || continue ./test || continue echo ===== Success. Using $name.c. >&2 mv cpucycles-impl.o cpucycles.o mv cpucycles-impl.h cpucycles.h rm -f $cleanup exit 0 done echo ===== Giving up. >&2 rm -f $output $cleanup exit 111 ) libm4rie-20130416/bench/cpucycles-20060326/do.notes000066400000000000000000000004471212302364300211110ustar00rootroot00000000000000===== Trying amd64tscfreq.c with gcc... cpucycles-impl.o: In function `cpucycles_amd64tscfreq_persecond': cpucycles-impl.c:(.text+0x3a): undefined reference to `sysctlbyname' collect2: error: ld returned 1 exit status ===== Trying amd64cpuinfo.c with gcc... ===== Success. Using amd64cpuinfo.c. libm4rie-20130416/bench/cpucycles-20060326/gettimeofday.c000066400000000000000000000014651212302364300222630ustar00rootroot00000000000000#include #include #include #include static double cpufrequency = 0; static void init(void) { FILE *f; double result; int s; f = fopen("/proc/cpuinfo","r"); if (!f) return; for (;;) { s = fscanf(f,"cpu MHz : %lf",&result); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { result = 0; break; } } fclose(f); cpufrequency = 1000000.0 * result; } long long cpucycles_gettimeofday(void) { double result; struct timeval t; if (!cpufrequency) init(); gettimeofday(&t,(struct timezone *) 0); result = t.tv_usec; result *= 0.000001; result += (double) t.tv_sec; result *= cpufrequency; return result; } long long cpucycles_gettimeofday_persecond(void) { if (!cpufrequency) init(); return cpufrequency; } libm4rie-20130416/bench/cpucycles-20060326/gettimeofday.h000066400000000000000000000010071212302364300222600ustar00rootroot00000000000000/* cpucycles gettimeofday.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_gettimeofday_h #define CPUCYCLES_gettimeofday_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_gettimeofday(void); extern long long cpucycles_gettimeofday_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "gettimeofday" #define cpucycles cpucycles_gettimeofday #define cpucycles_persecond cpucycles_gettimeofday_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/hppapstat.c000066400000000000000000000010421212302364300215750ustar00rootroot00000000000000#include #include #include #include #include #include long long cpucycles_hppapstat(void) { register long long result; _MFCTL(16,result); return result; } long long cpucycles_hppapstat_persecond(void) { struct pst_processor pst; union pstun pu; double result; pu.pst_processor = &pst; if (pstat(PSTAT_PROCESSOR,pu,sizeof(pst),1,0) < 0) return 0; result = pst.psp_iticksperclktick; result *= (double) sysconf(_SC_CLK_TCK); return result; } libm4rie-20130416/bench/cpucycles-20060326/hppapstat.h000066400000000000000000000007571212302364300216160ustar00rootroot00000000000000/* cpucycles hppapstat.h version 20060319 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_hppapstat_h #define CPUCYCLES_hppapstat_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_hppapstat(void); extern long long cpucycles_hppapstat_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "hppapstat" #define cpucycles cpucycles_hppapstat #define cpucycles_persecond cpucycles_hppapstat_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/powerpcaix.c000066400000000000000000000030351212302364300217560ustar00rootroot00000000000000#include #include #include #include #include static long myround(double u) { long result = u; while (result + 0.5 < u) result += 1; while (result - 0.5 > u) result -= 1; return result; } static long long microseconds(void) { struct timeval t; gettimeofday(&t,(struct timezone *) 0); return t.tv_sec * (long long) 1000000 + t.tv_usec; } static long long timebase(void) { unsigned long high; unsigned long low; unsigned long newhigh; unsigned long long result; asm volatile( "Lcpucycles:mftbu %0;mftb %1;mftbu %2;cmpw %0,%2;bne Lcpucycles" : "=r" (high), "=r" (low), "=r" (newhigh) ); result = high; result <<= 32; result |= low; return result; } static double cpufrequency = 0; static long tbcycles = 0; static void init(void) { FILE *f; long long tb0; long long us0; long long tb1; long long us1; f = popen("/usr/sbin/lsattr -E -l proc0 -a frequency","r"); if (!f) return; if (fscanf(f,"frequency %lf",&cpufrequency) < 1) cpufrequency = 0; pclose(f); if (!cpufrequency) return; tb0 = timebase(); us0 = microseconds(); do { tb1 = timebase(); us1 = microseconds(); } while (us1 - us0 < 10000); if (tb1 <= tb0) return; tb1 -= tb0; us1 -= us0; tbcycles = myround((cpufrequency * 0.000001 * (double) us1) / (double) tb1); } long long cpucycles_powerpcaix(void) { if (!tbcycles) init(); return timebase() * tbcycles; } long long cpucycles_powerpcaix_persecond(void) { if (!tbcycles) init(); return cpufrequency; } libm4rie-20130416/bench/cpucycles-20060326/powerpcaix.h000066400000000000000000000007671212302364300217740ustar00rootroot00000000000000/* cpucycles powerpcaix.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_powerpcaix_h #define CPUCYCLES_powerpcaix_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_powerpcaix(void); extern long long cpucycles_powerpcaix_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "powerpcaix" #define cpucycles cpucycles_powerpcaix #define cpucycles_persecond cpucycles_powerpcaix_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/powerpclinux.c000066400000000000000000000032341212302364300223350ustar00rootroot00000000000000#include #include #include #include #include static long myround(double u) { long result = u; while (result + 0.5 < u) result += 1; while (result - 0.5 > u) result -= 1; return result; } static long long microseconds(void) { struct timeval t; gettimeofday(&t,(struct timezone *) 0); return t.tv_sec * (long long) 1000000 + t.tv_usec; } static long long timebase(void) { unsigned long high; unsigned long low; unsigned long newhigh; unsigned long long result; asm volatile( "Lcpucycles:mftbu %0;mftb %1;mftbu %2;cmpw %0,%2;bne Lcpucycles" : "=r" (high), "=r" (low), "=r" (newhigh) ); result = high; result <<= 32; result |= low; return result; } static double cpufrequency = 0; static long tbcycles = 0; static void init(void) { FILE *f; int s; long long tb0; long long us0; long long tb1; long long us1; f = fopen("/proc/cpuinfo","r"); if (!f) return 0; for (;;) { s = fscanf(f," clock : %lf MHz",&cpufrequency); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { cpufrequency = 0; break; } } fclose(f); if (!cpufrequency) return; cpufrequency *= 1000000.0; tb0 = timebase(); us0 = microseconds(); do { tb1 = timebase(); us1 = microseconds(); } while (us1 - us0 < 10000); if (tb1 <= tb0) return; tb1 -= tb0; us1 -= us0; tbcycles = myround((cpufrequency * 0.000001 * (double) us1) / (double) tb1); } long long cpucycles_powerpclinux(void) { if (!tbcycles) init(); return timebase() * tbcycles; } long long cpucycles_powerpclinux_persecond(void) { if (!tbcycles) init(); return cpufrequency; } libm4rie-20130416/bench/cpucycles-20060326/powerpclinux.h000066400000000000000000000010071212302364300223360ustar00rootroot00000000000000/* cpucycles powerpclinux.h version 20060319 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_powerpclinux_h #define CPUCYCLES_powerpclinux_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_powerpclinux(void); extern long long cpucycles_powerpclinux_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "powerpclinux" #define cpucycles cpucycles_powerpclinux #define cpucycles_persecond cpucycles_powerpclinux_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/powerpcmacos.c000066400000000000000000000017521212302364300223030ustar00rootroot00000000000000#include #include #include #define timebase mach_absolute_time static int cpumib[2] = { CTL_HW, HW_CPU_FREQ } ; static int tbmib[2] = { CTL_HW, HW_TB_FREQ } ; static long myround(double u) { long result = u; while (result + 0.5 < u) result += 1; while (result - 0.5 > u) result -= 1; return result; } static long tbcycles = 0; static void init(void) { int cpufrequency = 0; size_t cpufrequencylen = sizeof(int); int tbfrequency = 0; size_t tbfrequencylen = sizeof(int); sysctl(cpumib,2,&cpufrequency,&cpufrequencylen,0,0); sysctl(tbmib,2,&tbfrequency,&tbfrequencylen,0,0); if (tbfrequency > 0) tbcycles = myround((double) cpufrequency / (double) tbfrequency); } long long cpucycles_powerpcmacos(void) { if (!tbcycles) init(); return timebase() * tbcycles; } long long cpucycles_powerpcmacos_persecond(void) { int result = 0; size_t resultlen = sizeof(int); sysctl(cpumib,2,&result,&resultlen,0,0); return result; } libm4rie-20130416/bench/cpucycles-20060326/powerpcmacos.h000066400000000000000000000010071212302364300223010ustar00rootroot00000000000000/* cpucycles powerpcmacos.h version 20060319 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_powerpcmacos_h #define CPUCYCLES_powerpcmacos_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_powerpcmacos(void); extern long long cpucycles_powerpcmacos_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "powerpcmacos" #define cpucycles cpucycles_powerpcmacos #define cpucycles_persecond cpucycles_powerpcmacos_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/sparc32psrinfo.c000066400000000000000000000011611212302364300224510ustar00rootroot00000000000000#include #include long long cpucycles_sparc32psrinfo(void) { long long result; asm volatile( ".word 0x93410000;.word 0x91327020;mov %%g0,%0" : "=r" (result) : : "%g0" ); return result; } long long cpucycles_sparc32psrinfo_persecond(void) { FILE *f; double result; int s; f = popen("/usr/sbin/psrinfo -v","r"); if (!f) return 0; for (;;) { s = fscanf(f," The %*s processor operates at %lf MHz",&result); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { result = 0; break; } } pclose(f); return 1000000.0 * result; } libm4rie-20130416/bench/cpucycles-20060326/sparc32psrinfo.h000066400000000000000000000010271212302364300224570ustar00rootroot00000000000000/* cpucycles sparc32psrinfo.h version 20060319 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_sparc32psrinfo_h #define CPUCYCLES_sparc32psrinfo_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_sparc32psrinfo(void); extern long long cpucycles_sparc32psrinfo_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "sparc32psrinfo" #define cpucycles cpucycles_sparc32psrinfo #define cpucycles_persecond cpucycles_sparc32psrinfo_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/sparcpsrinfo.c000066400000000000000000000010541212302364300223050ustar00rootroot00000000000000#include #include long long cpucycles_sparcpsrinfo(void) { long long result; asm volatile("rd %%tick,%0" : "=r" (result)); return result; } long long cpucycles_sparcpsrinfo_persecond(void) { FILE *f; double result; int s; f = popen("/usr/sbin/psrinfo -v","r"); if (!f) return 0; for (;;) { s = fscanf(f," The %*s processor operates at %lf MHz",&result); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { result = 0; break; } } pclose(f); return 1000000.0 * result; } libm4rie-20130416/bench/cpucycles-20060326/sparcpsrinfo.h000066400000000000000000000010071212302364300223100ustar00rootroot00000000000000/* cpucycles sparcpsrinfo.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_sparcpsrinfo_h #define CPUCYCLES_sparcpsrinfo_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_sparcpsrinfo(void); extern long long cpucycles_sparcpsrinfo_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "sparcpsrinfo" #define cpucycles cpucycles_sparcpsrinfo #define cpucycles_persecond cpucycles_sparcpsrinfo_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/test.c000066400000000000000000000026631212302364300205620ustar00rootroot00000000000000#include #include #include #include #include "cpucycles-impl.h" static long long tod(void) { struct timeval t; gettimeofday(&t,(struct timezone *) 0); return t.tv_sec * (long long) 1000000 + t.tv_usec; } long long todstart; long long todend; long long cpustart; long long cpuend; long long t[1001]; main() { int i; for (i = 0;i <= 1000;++i) t[i] = cpucycles(); for (i = 0;i < 1000;++i) if (t[i] > t[i + 1]) { fprintf(stderr,"t[%d] = %lld\n",i,t[i]); fprintf(stderr,"t[%d] = %lld\n",i + 1,t[i + 1]); fprintf(stderr,"cpucycles_persecond() = %lld\n",cpucycles_persecond()); return 100; } if (t[0] == t[1000]) { fprintf(stderr,"t[%d] = %lld\n",0,t[0]); fprintf(stderr,"t[%d] = %lld\n",1000,t[1000]); fprintf(stderr,"cpucycles_persecond() = %lld\n",cpucycles_persecond()); return 100; } if (cpucycles_persecond() <= 0) { fprintf(stderr,"cpucycles_persecond() = %lld\n",cpucycles_persecond()); return 100; } todstart = tod(); cpustart = cpucycles(); sleep(1); todend = tod(); cpuend = cpucycles(); todend -= todstart; cpuend -= cpustart; for (i = 0;i <= 1000;++i) t[i] = cpucycles(); printf("%s",cpucycles_implementation); printf(" %lld",cpucycles_persecond()); printf(" %lld",(long long) (((double) cpuend) * 1000000.0 / (double) todend)); for (i = 0;i < 64;++i) printf(" %lld",t[i + 1] - t[i]); printf("\n"); return 0; } libm4rie-20130416/bench/cpucycles-20060326/x86cpuinfo.c000066400000000000000000000010151212302364300216020ustar00rootroot00000000000000#include #include long long cpucycles_x86cpuinfo(void) { long long result; asm volatile(".byte 15;.byte 49" : "=A" (result)); return result; } long long cpucycles_x86cpuinfo_persecond(void) { FILE *f; double result; int s; f = fopen("/proc/cpuinfo","r"); if (!f) return 0; for (;;) { s = fscanf(f,"cpu MHz : %lf",&result); if (s > 0) break; if (s == 0) s = fscanf(f,"%*[^\n]\n"); if (s < 0) { result = 0; break; } } fclose(f); return 1000000.0 * result; } libm4rie-20130416/bench/cpucycles-20060326/x86cpuinfo.h000066400000000000000000000007671212302364300216240ustar00rootroot00000000000000/* cpucycles x86cpuinfo.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_x86cpuinfo_h #define CPUCYCLES_x86cpuinfo_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_x86cpuinfo(void); extern long long cpucycles_x86cpuinfo_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "x86cpuinfo" #define cpucycles cpucycles_x86cpuinfo #define cpucycles_persecond cpucycles_x86cpuinfo_persecond #endif #endif libm4rie-20130416/bench/cpucycles-20060326/x86tscfreq.c000066400000000000000000000005511212302364300216120ustar00rootroot00000000000000#include #include long long cpucycles_x86tscfreq(void) { long long result; asm volatile(".byte 15;.byte 49" : "=A" (result)); return result; } long long cpucycles_x86tscfreq_persecond(void) { long result = 0; size_t resultlen = sizeof(long); sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0); return result; } libm4rie-20130416/bench/cpucycles-20060326/x86tscfreq.h000066400000000000000000000007671212302364300216300ustar00rootroot00000000000000/* cpucycles x86tscfreq.h version 20060318 D. J. Bernstein Public domain. */ #ifndef CPUCYCLES_x86tscfreq_h #define CPUCYCLES_x86tscfreq_h #ifdef __cplusplus extern "C" { #endif extern long long cpucycles_x86tscfreq(void); extern long long cpucycles_x86tscfreq_persecond(void); #ifdef __cplusplus } #endif #ifndef cpucycles_implementation #define cpucycles_implementation "x86tscfreq" #define cpucycles cpucycles_x86tscfreq #define cpucycles_persecond cpucycles_x86tscfreq_persecond #endif #endif libm4rie-20130416/configure.ac000066400000000000000000000026531212302364300157530ustar00rootroot00000000000000AC_INIT(m4rie,20130416) AC_CANONICAL_HOST AC_CONFIG_SRCDIR(m4rie/newton_john.c) AC_CONFIG_MACRO_DIR([m4]) AM_INIT_AUTOMAKE AC_PROG_LIBTOOL AC_PROG_INSTALL AC_CONFIG_HEADERS(m4rie/config.h) AC_PROG_CXX AC_PROG_CC_C99() if test "$ac_cv_prog_cc_c99" = "no"; then AC_MSG_ERROR([C99 support is required but not found.]) fi # OpenMP support AC_ARG_ENABLE([openmp], AS_HELP_STRING( [--enable-openmp],[add support for OpenMP multicore support.])) AS_IF([test "x$enable_openmp" = "xyes"], [ AX_OPENMP() ]) AC_SUBST(OPENMP_CFLAGS) # Debugging support AC_ARG_ENABLE(debug, [ --enable-debug Enable assert() statements for debugging.]) if test "x$enable_debug" = x"yes"; then DEBUG_FLAGS="-g" AC_SUBST(DEBUG_FLAGS) else AC_DEFINE(NDEBUG,1,[Define whether debugging is enabled]) fi AC_ARG_WITH(m4ri, AS_HELP_STRING([--with-m4ri@<:@=VALUE@:>@], [Optional prefix for M4RI library]),[m4rie_config_m4riprefix=$withval]) if test -n $m4rie_config_m4riprefix; then M4RIE_M4RI_PREFIX=${m4rie_m4ri_prefix} AC_SUBST(M4RIE_M4RI_PREFIX) M4RIE_M4RI_LDFLAGS="-L${m4rie_config_m4riprefix}/lib" AC_SUBST(M4RIE_M4RI_LDFLAGS) M4RIE_M4RI_CFLAGS="-I${m4rie_config_m4riprefix}/include" AC_SUBST(M4RIE_M4RI_CFLAGS) fi AX_M4RI_CFLAGS() M4RI_CFLAGS=$ax_cv_m4ri_cflags AC_SUBST(M4RI_CFLAGS) RELEASE="AC_PACKAGE_VERSION" AC_SUBST(RELEASE) AC_PROG_MAKE_SET AC_CONFIG_FILES([Makefile bench/Makefile]) AC_OUTPUT libm4rie-20130416/gf2e_cxx/000077500000000000000000000000001212302364300151645ustar00rootroot00000000000000libm4rie-20130416/gf2e_cxx/finite_field_givaro.h000066400000000000000000000044101212302364300213240ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include #include namespace M4RIE { #if GIVARO_VERSION < 30400 || GIVARO_VERSION >= 196608 // old Givaro versions used 0x03xxyy class FiniteField: public GFqDom { public: FiniteField(const unsigned int e) : GFqDom(2, e){}; #else class FiniteField: public Givaro::GFqDom { public: FiniteField(const unsigned int e) : Givaro::GFqDom(2, e){}; #endif unsigned int log2pol(int x) { return _log2pol[x]; }; unsigned int pol2log(int x) { return _pol2log[x]; }; }; }; static inline gf2e *gf2e_init_givgfq(M4RIE::FiniteField *givgfq) { word minpoly = givgfq->pol2log(1); unsigned int degree = givgfq->exponent(); for(unsigned int i = 0; imul((int&)minpoly, (int)givgfq->pol2log(2) , (int)minpoly); } minpoly = givgfq->log2pol(minpoly); minpoly = minpoly ^ (1<pol2log((int)__mzd_read_bits(a->x, row, a->w*col, a->w)); }; static inline void mzed_write_elem_log(mzed_t *a, const size_t row, const size_t col, const int elem, M4RIE::FiniteField *ff) { __mzd_clear_bits(a->x, row, a->w*col, a->w); __mzd_xor_bits(a->x, row, a->w*col, a->w, ff->log2pol(elem)); }; static inline void mzed_add_elem_log(mzed_t *a, const size_t row, const size_t col, const int elem, M4RIE::FiniteField *ff) { __mzd_xor_bits(a->x, row, a->w*col, a->w, ff->log2pol(elem)); }; libm4rie-20130416/m4/000077500000000000000000000000001212302364300137775ustar00rootroot00000000000000libm4rie-20130416/m4/ax_check_compiler_flags.m4000066400000000000000000000063771212302364300210710ustar00rootroot00000000000000# =========================================================================== # http://autoconf-archive.cryp.to/ax_check_compiler_flags.html # =========================================================================== # # SYNOPSIS # # AX_CHECK_COMPILER_FLAGS(FLAGS, [ACTION-SUCCESS], [ACTION-FAILURE]) # # DESCRIPTION # # Check whether the given compiler FLAGS work with the current language's # compiler, or whether they give an error. (Warnings, however, are # ignored.) # # ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on # success/failure. # # LAST MODIFICATION # # 2008-04-12 # # COPYLEFT # # Copyright (c) 2008 Steven G. Johnson # Copyright (c) 2008 Matteo Frigo # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Macro Archive. When you make and # distribute a modified version of the Autoconf Macro, you may extend this # special exception to the GPL to apply to your modified version as well. AC_DEFUN([AX_CHECK_COMPILER_FLAGS], [AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX AC_MSG_CHECKING([whether _AC_LANG compiler accepts $1]) dnl Some hackery here since AC_CACHE_VAL can't handle a non-literal varname: AS_LITERAL_IF([$1], [AC_CACHE_VAL(AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1), [ ax_save_FLAGS=$[]_AC_LANG_PREFIX[]FLAGS _AC_LANG_PREFIX[]FLAGS="$1" AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=yes, AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=no) _AC_LANG_PREFIX[]FLAGS=$ax_save_FLAGS])], [ax_save_FLAGS=$[]_AC_LANG_PREFIX[]FLAGS _AC_LANG_PREFIX[]FLAGS="$1" AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], eval AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=yes, eval AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=no) _AC_LANG_PREFIX[]FLAGS=$ax_save_FLAGS]) eval ax_check_compiler_flags=$AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1) AC_MSG_RESULT($ax_check_compiler_flags) if test "x$ax_check_compiler_flags" = xyes; then m4_default([$2], :) else m4_default([$3], :) fi ])dnl AX_CHECK_COMPILER_FLAGS libm4rie-20130416/m4/ax_m4ri_flags.m4000066400000000000000000000022741212302364300167650ustar00rootroot00000000000000# SYNOPSIS # # AX_M4RI_CFLAGS # # DESCRIPTION # # Defines M4RI_CFLAGS which contains the CFLAGS used for building # the copy of M4RI we're linking against. # # LAST MODIFICATION # # 2011-10-03 # # COPYLEFT # # Copyright (c) 2009,2010 Martin Albrecht # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. AC_DEFUN([AX_M4RI_CFLAGS], [ AC_PREREQ(2.59) AC_REQUIRE([AC_PROG_CC]) AC_REQUIRE([AC_PROG_SED]) save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $M4RIE_M4RI_CFLAGS" AC_CACHE_CHECK(for M4RI CFLAGS, ax_cv_m4ri_cflags, [AC_RUN_IFELSE([AC_LANG_PROGRAM([[ #include #include ]], [[ FILE *f; f = fopen("conftest_m4ri_cflags", "w"); if (!f) return 1; fprintf(f,"%s %s",__M4RI_SIMD_CFLAGS, __M4RI_OPENMP_CFLAGS); fclose(f); return 0; ]])], [ax_cv_m4ri_cflags=`cat conftest_m4ri_cflags`; rm -f conftest_m4ri_cflags; CFLAGS="$save_CFLAGS"], [ax_cv_m4ri_cflags=""; rm -f conftest_m4ri_cflags; CFLAGS="$save_CFLAGS"], [ax_cv_m4ri_cflags=""; CFLAGS="$save_CFLAGS"])])]) ]) libm4rie-20130416/m4/ax_openmp.m4000066400000000000000000000103361212302364300162320ustar00rootroot00000000000000# =========================================================================== # http://autoconf-archive.cryp.to/ax_openmp.html # =========================================================================== # # SYNOPSIS # # AX_OPENMP([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) # # DESCRIPTION # # This macro tries to find out how to compile programs that use OpenMP a # standard API and set of compiler directives for parallel programming # (see http://www-unix.mcs/) # # On success, it sets the OPENMP_CFLAGS/OPENMP_CXXFLAGS/OPENMP_F77FLAGS # output variable to the flag (e.g. -omp) used both to compile *and* link # OpenMP programs in the current language. # # NOTE: You are assumed to not only compile your program with these flags, # but also link it with them as well. # # If you want to compile everything with OpenMP, you should set: # # CFLAGS="$CFLAGS $OPENMP_CFLAGS" # #OR# CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS" # #OR# FFLAGS="$FFLAGS $OPENMP_FFLAGS" # # (depending on the selected language). # # The user can override the default choice by setting the corresponding # environment variable (e.g. OPENMP_CFLAGS). # # ACTION-IF-FOUND is a list of shell commands to run if an OpenMP flag is # found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is # not found. If ACTION-IF-FOUND is not specified, the default action will # define HAVE_OPENMP. # # LAST MODIFICATION # # 2008-04-12 # # COPYLEFT # # Copyright (c) 2008 Steven G. Johnson # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Macro Archive. When you make and # distribute a modified version of the Autoconf Macro, you may extend this # special exception to the GPL to apply to your modified version as well. AC_DEFUN([AX_OPENMP], [ AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown # Flags to try: -fopenmp (gcc), -openmp (icc), -mp (SGI & PGI), # -xopenmp (Sun), -omp (Tru64), -qsmp=omp (AIX), none ax_openmp_flags="-fopenmp -openmp -mp -xopenmp -omp -qsmp=omp none" if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags" fi for ax_openmp_flag in $ax_openmp_flags; do case $ax_openmp_flag in none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;; *) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;; esac AC_TRY_LINK_FUNC(omp_set_num_threads, [ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break]) done []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS ]) if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" = "xunknown"; then m4_default([$2],:) else if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" != "xnone"; then OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ax_cv_[]_AC_LANG_ABBREV[]_openmp fi m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])]) fi ])dnl AX_OPENMP libm4rie-20130416/m4rie000077700000000000000000000000001212302364300151272srcustar00rootroot00000000000000libm4rie-20130416/src/000077500000000000000000000000001212302364300142465ustar00rootroot00000000000000libm4rie-20130416/src/Doxyfile000066400000000000000000002063051212302364300157620ustar00rootroot00000000000000# Doxyfile 1.7.1 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project # # All text after a hash (#) is considered a comment and will be ignored # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" ") #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # http://www.gnu.org/software/libiconv for the list of possible encodings. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded # by quotes) that should identify the project. PROJECT_NAME = M4RIE # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or # if some version control system is used. PROJECT_NUMBER = 0.20111004 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. OUTPUT_DIRECTORY = ../doc/ # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # The default language is English, other supported languages are: # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will # include brief member descriptions after the members that are listed in # the file and class documentation (similar to JavaDoc). # Set to NO to disable this. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend # the brief description of a member or function before the detailed description. # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator # that is used to form the text in various listings. Each string # in this list, if found as the leading text of the brief description, will be # stripped from the text and the result after processing the whole list, is # used as the annotated text. Otherwise, the brief description is used as-is. # If left blank, the following values are used ("$name" is automatically # replaced with the name of the entity): "The $name class" "The $name widget" # "The $name file" "is" "provides" "specifies" "contains" # "represents" "a" "an" "the" ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # Doxygen will generate a detailed section even if there is only a brief # description. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. FULL_PATH_NAMES = YES # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is # only done if one of the specified strings matches the left-hand part of # the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the # path to strip. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of # the path mentioned in the documentation of a class, which tells # the reader which header file to include in order to use a class. # If left blank only the name of the header file containing the class # definition is used. Otherwise one should specify the include paths that # are normally passed to the compiler using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter # (but less readable) file names. This can be useful is your file systems # doesn't support long names like on DOS, Mac, or CD-ROM. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen # will interpret the first line (until the first dot) of a JavaDoc-style # comment as the brief description. If set to NO, the JavaDoc # comments will behave just like regular Qt-style comments # (thus requiring an explicit @brief command for a brief description.) JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then Doxygen will # interpret the first line (until the first dot) of a Qt-style # comment as the brief description. If set to NO, the comments # will behave just like regular Qt-style comments (thus requiring # an explicit \brief command for a brief description.) QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen # treat a multi-line C++ special comment block (i.e. a block of //! or /// # comments) as a brief description. This used to be the default behaviour. # The new default is to treat a multi-line C++ comment block as a detailed # description. Set this tag to YES if you prefer the old behaviour instead. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented # member inherits the documentation from any documented member that it # re-implements. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce # a new page for each member. If set to NO, the documentation of a member will # be part of the file/class/namespace that contains it. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". # For example adding "sideeffect=\par Side Effects:\n" will allow you to # put the command \sideeffect (or @sideeffect) in the documentation, which # will result in a user-defined paragraph with heading "Side Effects:". # You can put \n's in the value part of an alias to insert newlines. ALIASES = "GF2E=\f$\mathbb{F}_{2^e}\f$" "GF2=\f$\mathbb{F}_2\f$" "GF2X=\f$\mathbb{F}_2[x]\f$" " e=\f$e\f$" "GF4=\f$\mathbb{F}_{2^2}\f$" "GF8=\f$\mathbb{F}_{2^3}\f$" "GF16=\f$\mathbb{F}_{2^4}\f$" "GF32=\f$\mathbb{F}_{2^5}\f$" "GF64=\f$\mathbb{F}_{2^6}\f$" "GF128=\f$\mathbb{F}_{2^7}\f$" "GF256=\f$\mathbb{F}_{2^8}\f$" "GF512=\f$\mathbb{F}_{2^9}\f$" "GF1024=\f$\mathbb{F}_{2^10}\f$" # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for # Java. For instance, namespaces will be presented as packages, qualified # scopes will look different, etc. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources only. Doxygen will then generate output that is more tailored for # Fortran. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for # VHDL. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given extension. # Doxygen has a built-in mapping, but you can override or extend it using this # tag. The format is ext=language, where ext is a file extension, and language # is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, # C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make # doxygen treat .inc files as Fortran files (default is PHP), and .f files as C # (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions # you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. EXTENSION_MAPPING = # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. # func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. # Doxygen will parse them like normal C++ but will assume all classes use public # instead of private inheritance when no explicit protection keyword is present. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate getter # and setter methods for a property. Setting this option to YES (the default) # will make doxygen to replace the get and set methods by a property in the # documentation. This will only work if the methods are indeed getting or # setting a simple type. If this is not the case, or you want to show the # methods anyway, you should set this option to NO. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a # subgroup of that type (e.g. under the Public Functions section). Set it to # NO to prevent subgrouping. Alternatively, this can be done per class using # the \nosubgrouping command. SUBGROUPING = YES # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum # is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically # be useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. TYPEDEF_HIDES_STRUCT = NO # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to # determine which symbols to keep in memory and which to flush to disk. # When the cache is full, less often used symbols will be written to disk. # For small to medium size projects (<1000 input files) the default value is # probably good enough. For larger projects a too small cache size can cause # doxygen to be busy swapping symbols to and from disk most of the time # causing a significant performance penality. # If the system has enough physical memory increasing the cache will improve the # performance by keeping more symbols in memory. Note that the value works on # a logarithmic scale so increasing the size by one will rougly double the # memory usage. The cache size is given by this formula: # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols SYMBOL_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. EXTRACT_PRIVATE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base # name of the file that contains the anonymous namespace. By default # anonymous namespace are hidden. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. # If set to NO (the default) these members will be included in the # various overviews, but no documentation section is generated. # This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. # If set to NO (the default) these classes will be included in the various # overviews. This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. # If set to NO (the default) these declarations will be included in the # documentation. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any # documentation blocks found inside the body of a function. # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. SHOW_INCLUDE_FILES = YES # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen # will list include files with double quotes in the documentation # rather than with sharp brackets. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] # is inserted in the documentation for inline members. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen # will sort the (detailed) documentation of file and class members # alphabetically by member name. If set to NO the members will appear in # declaration order. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the # brief documentation of file, namespace and class members alphabetically # by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen # will sort the (brief and detailed) documentation of class members so that # constructors and destructors are listed first. If set to NO (the default) # the constructors will appear in the respective orders defined by # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the # hierarchy of group names into alphabetical order. If set to NO (the default) # the group names will appear in their defined order. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be # sorted by fully-qualified names, including namespaces. If set to # NO (the default), the class list will be sorted only by class name, # not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the # alphabetical list. SORT_BY_SCOPE_NAME = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if sectionname ... \endif. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines # the initial value of a variable or define consists of for it to appear in # the documentation. If the initializer consists of more lines than specified # here it will be hidden. Use a value of 0 to hide initializers completely. # The appearance of the initializer of individual variables and defines in the # documentation can be controlled using \showinitializer or \hideinitializer # command in the documentation regardless of this setting. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # If the sources in your project are distributed over multiple directories # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy # in the documentation. The default is NO. SHOW_DIRECTORIES = NO # Set the SHOW_FILES tag to NO to disable the generation of the Files page. # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the # Namespaces page. # This will remove the Namespaces entry from the Quick Index # and from the Folder Tree View (if specified). The default is YES. SHOW_NAMESPACES = NO # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command , where is the value of # the FILE_VERSION_FILTER tag, and is the name of an input file # provided by doxygen. Whatever the program writes to standard output # is used as the file version. See the manual for examples. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. The create the layout file # that represents doxygen's defaults, run doxygen with the -l option. # You can optionally specify a file name after the option, if omitted # DoxygenLayout.xml will be used as the name of the layout file. LAYOUT_FILE = #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank # NO is used. WARNINGS = YES # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. WARN_IF_UNDOCUMENTED = YES # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some # parameters in a documented function, or documenting parameters that # don't exist or using markup commands wrongly. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be abled to get warnings for # functions that are documented, but have no documentation for their parameters # or return value. If set to NO (the default) doxygen will only warn about # wrong or incomplete parameter documentation, but not about the absence of # documentation. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that # doxygen can produce. The string should contain the $file, $line, and $text # tags, which will be replaced by the file and line number from which the # warning originated and the warning text. Optionally the format may contain # $version, which will be replaced by the version of the file (if it could # be obtained via FILE_VERSION_FILTER) WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning # and error messages should be written. If left blank the output is written # to stderr. WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is # also the default input encoding. Doxygen uses libiconv (or the iconv built # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for # the list of possible encodings. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank the following patterns are tested: # *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx # *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 FILE_PATTERNS = # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. EXCLUDE = config.h m4ri_functions.h # The EXCLUDE_SYMLINKS tag can be used select whether or not files or # directories that are symbolic links (a Unix filesystem feature) are excluded # from the input. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. Note that the wildcards are matched # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see # the \include command). EXAMPLE_PATH = ../tests ../bench # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank all files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude # commands irrespective of the value of the RECURSIVE tag. # Possible values are YES and NO. If left blank NO is used. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or # directories that contain image that are included in the documentation (see # the \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command , where # is the value of the INPUT_FILTER tag, and is the name of an # input file. Doxygen will then use the output that the filter program writes # to standard output. # If FILTER_PATTERNS is specified, this tag will be # ignored. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. # Doxygen will compare the file name with each pattern and apply the # filter if there is a match. # The filters are a list of the form: # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER # is applied to all files. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will be used to filter the input files when producing source # files to browse (i.e. when SOURCE_BROWSER is set to YES). FILTER_SOURCE_FILES = NO #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C and C++ comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. # Otherwise they will link to the documentation. REFERENCES_LINK_SOURCE = YES # If the USE_HTAGS tag is set to YES then the references to source code # will point to the HTML generated by the htags(1) tool instead of doxygen # built-in source browser. The htags tool is part of GNU's global source # tagging system (see http://www.gnu.org/software/global/global.html). You # will need version 4.8.6 or higher. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. ALPHABETICAL_INDEX = NO # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that # should be ignored while generating the index headers. IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If the tag is left blank doxygen # will generate a default style sheet. Note that doxygen will try to copy # the style sheet file to the HTML output directory, so don't put your own # stylesheet in the HTML output directory as well, or it will be erased! HTML_STYLESHEET = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. # Doxygen will adjust the colors in the stylesheet and background images # according to this color. Hue is specified as an angle on a colorwheel, # see http://en.wikipedia.org/wiki/Hue for more information. # For instance the value 0 represents red, 60 is yellow, 120 is green, # 180 is cyan, 240 is blue, 300 purple, and 360 is red again. # The allowed range is 0 to 359. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of # the colors in the HTML output. For a value of 0 the output will use # grayscales only. A value of 255 will produce the most vivid colors. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to # the luminance component of the colors in the HTML output. Values below # 100 gradually make the output lighter, whereas values above 100 make # the output darker. The value divided by 100 is the actual gamma applied, # so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, # and 100 does not change the gamma. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting # this to NO can help when comparing the output of multiple runs. HTML_TIMESTAMP = YES # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, # files or namespaces will be aligned in HTML using tables. If set to # NO a bullet list will be used. HTML_ALIGN_MEMBERS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. For this to work a browser that supports # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). HTML_DYNAMIC_SECTIONS = YES # If the GENERATE_DOCSET tag is set to YES, additional index files # will be generated that can be used as input for Apple's Xcode 3 # integrated development environment, introduced with OSX 10.5 (Leopard). # To create a documentation set, doxygen will generate a Makefile in the # HTML output directory. Running make will produce the docset in that # directory and running "make install" will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find # it at startup. # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. GENERATE_DOCSET = NO # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the # feed. A documentation feed provides an umbrella under which multiple # documentation sets from a single provider (such as a company or product suite) # can be grouped. DOCSET_FEEDNAME = "Doxygen generated docs" # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that # should uniquely identify the documentation set bundle. This should be a # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen # will append .docset to the name. DOCSET_BUNDLE_ID = org.doxygen.Project # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES, additional index files # will be generated that can be used as input for tools like the # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) # of the generated HTML documentation. GENERATE_HTMLHELP = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can # be used to specify the file name of the resulting .chm file. You # can add a path in front of the file if the result should not be # written to the html output directory. CHM_FILE = # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can # be used to specify the location (absolute path including file name) of # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run # the HTML help compiler on the generated index.hhp. HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that # it should be included in the master .chm file (NO). GENERATE_CHI = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING # is used to encode HtmlHelp index (hhk), content (hhc) and project file # content. CHM_INDEX_ENCODING = # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag # controls whether a binary table of contents is generated (YES) or a # normal table of contents (NO) in the .chm file. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members # to the contents of the HTML help documentation and to the tree view. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated # that can be used as input for Qt's qhelpgenerator to generate a # Qt Compressed Help (.qch) of the generated HTML documentation. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can # be used to specify the file name of the resulting .qch file. # The path specified is relative to the HTML output folder. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#namespace QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#virtual-folders QHP_VIRTUAL_FOLDER = doc # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to # add. For more information please see # http://doc.trolltech.com/qthelpproject.html#custom-filters QHP_CUST_FILTER_NAME = # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see # # Qt Help Project / Custom Filters. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's # filter section matches. # # Qt Help Project / Filter Attributes. QHP_SECT_FILTER_ATTRS = # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can # be used to specify the location of Qt's qhelpgenerator. # If non-empty doxygen will try to run qhelpgenerator on the generated # .qhp file. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files # will be generated, which together with the HTML files, form an Eclipse help # plugin. To install this plugin and make it available under the help contents # menu in Eclipse, the contents of the directory containing the HTML and XML # files needs to be copied into the plugins directory of eclipse. The name of # the directory within the plugins directory should be the same as # the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before # the help appears. GENERATE_ECLIPSEHELP = NO # A unique identifier for the eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have # this name. ECLIPSE_DOC_ID = org.doxygen.Project # The DISABLE_INDEX tag can be used to turn on/off the condensed index at # top of each HTML page. The value NO (the default) enables the index and # the value YES disables it. DISABLE_INDEX = NO # This tag can be used to set the number of enum values (range [1..20]) # that doxygen will group on one line in the generated HTML documentation. ENUM_VALUES_PER_LINE = 4 # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. # If the tag value is set to YES, a side panel will be generated # containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). # Windows users are probably better off using the HTML help feature. GENERATE_TREEVIEW = NO # By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, # and Class Hierarchy pages using a tree view instead of an ordered list. USE_INLINE_TREES = NO # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open # links to external symbols imported via tag files in a separate window. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of Latex formulas included # as images in the HTML documentation. The default is 10. Note that # when you change the font size after a successful doxygen run you need # to manually remove any form_*.png images from the HTML output directory # to force them to be regenerated. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are # not supported properly for IE 6.0, but are supported on all modern browsers. # Note that when changing this option you need to delete any form_*.png files # in the HTML output before the changes have effect. FORMULA_TRANSPARENT = YES # When the SEARCHENGINE tag is enabled doxygen will generate a search box # for the HTML output. The underlying search engine uses javascript # and DHTML and should work on any modern browser. Note that when using # HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets # (GENERATE_DOCSET) there is already a search function so this one should # typically be disabled. For large projects the javascript based search engine # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. SEARCHENGINE = YES # When the SERVER_BASED_SEARCH tag is enabled the search engine will be # implemented using a PHP enabled web server instead of at the web client # using Javascript. Doxygen will generate the search PHP script and index # file to put on the web server. The advantage of the server # based approach is that it scales better to large projects and allows # full text search. The disadvances is that it is more difficult to setup # and does not have live searching capabilities. SERVER_BASED_SEARCH = NO #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. GENERATE_LATEX = YES # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. If left blank `latex' will be used as the default command name. # Note that when enabling USE_PDFLATEX this option is only used for # generating bitmaps for formulas in the HTML output, but not in the # Makefile that is written to the output directory. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to # generate index for LaTeX. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact # LaTeX documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = YES # The PAPER_TYPE tag can be used to set the paper type that is used # by the printer. Possible values are: a4, a4wide, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = a4wide # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. EXTRA_PACKAGES = amsfonts amsmath # The LATEX_HEADER tag can be used to specify a personal LaTeX header for # the generated latex document. The header should contain everything until # the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! LATEX_HEADER = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = YES # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of # plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. # command to the generated LaTeX files. This will instruct LaTeX to keep # running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = YES # If LATEX_HIDE_INDICES is set to YES then doxygen will not # include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = YES # If LATEX_SOURCE_CODE is set to YES then doxygen will include # source code with syntax highlighting in the LaTeX output. # Note that which sources are shown also depends on other settings # such as SOURCE_BROWSER. LATEX_SOURCE_CODE = NO #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output # The RTF output is optimized for Word 97 and may not look very pretty with # other RTF readers or editors. GENERATE_RTF = NO # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `rtf' will be used as the default path. RTF_OUTPUT = rtf # If the COMPACT_RTF tag is set to YES Doxygen generates more compact # RTF documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_RTF = NO # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated # will contain hyperlink fields. The RTF file will # contain links (just like the HTML output) instead of page references. # This makes the output suitable for online browsing using WORD or other # programs which support those fields. # Note: wordpad (write) and others do not support links. RTF_HYPERLINKS = NO # Load stylesheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an rtf document. # Syntax is similar to doxygen's config file. RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- # If the GENERATE_XML tag is set to YES Doxygen will # generate an XML file that captures the structure of # the code including all documentation. GENERATE_XML = NO # The XML_OUTPUT tag is used to specify where the XML pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `xml' will be used as the default path. XML_OUTPUT = xml # The XML_SCHEMA tag can be used to specify an XML schema, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_SCHEMA = # The XML_DTD tag can be used to specify an XML DTD, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_DTD = # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that # enabling this will significantly increase the size of the XML output. XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will # generate an AutoGen Definitions (see autogen.sf.net) file # that captures the structure of the code including all # documentation. Note that this feature is still experimental # and incomplete at the moment. GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- # If the GENERATE_PERLMOD tag is set to YES Doxygen will # generate a Perl module file that captures the structure of # the code including all documentation. Note that this # feature is still experimental and incomplete at the # moment. GENERATE_PERLMOD = NO # If the PERLMOD_LATEX tag is set to YES Doxygen will generate # the necessary Makefile rules, Perl scripts and LaTeX code to be able # to generate PDF and DVI output from the Perl module output. PERLMOD_LATEX = NO # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be # nicely formatted so it can be parsed by a human reader. # This is useful # if you want to understand what is going on. # On the other hand, if this # tag is set to NO the size of the Perl module output will be much smaller # and Perl will parse it just the same. PERLMOD_PRETTY = YES # The names of the make variables in the generated doxyrules.make file # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. # This is useful so different doxyrules.make files included by the same # Makefile don't overwrite each other's variables. PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = NO # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # in the INCLUDE_PATH (see below) will be search if a #include is found. SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by # the preprocessor. INCLUDE_PATH = # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the # directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. INCLUDE_FILE_PATTERNS = # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = M4RI_DOXYGEN # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition. EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all function-like macros that are alone # on a line, have an all uppercase name, and do not end with a semicolon. Such # function macros are typically used for boiler-plate code, and will confuse # the parser if not removed. SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- # The TAGFILES option can be used to specify one or more tagfiles. # Optionally an initial location of the external documentation # can be added for each tagfile. The format of a tag file without # this location is as follows: # # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # # TAGFILES = file1=loc1 "file2 = loc2" ... # where "loc1" and "loc2" can be relative or absolute paths or # URLs. If a location is present for each tag, the installdox tool # does not have to be run to correct the links. # Note that each tag file must have a unique name # (where the name does NOT include the path) # If a tag file is not located in the directory in which doxygen # is run, you must also specify the path to the tagfile here. TAGFILES = # When a file name is specified after GENERATE_TAGFILE, doxygen will create # a tag file that is based on the input files it reads. GENERATE_TAGFILE = # If the ALLEXTERNALS tag is set to YES all external classes will be listed # in the class index. If set to NO only the inherited external classes # will be listed. ALLEXTERNALS = NO # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed # in the modules index. If set to NO, only the current project's groups will # be listed. EXTERNAL_GROUPS = YES # The PERL_PATH should be the absolute path and name of the perl script # interpreter (i.e. the result of `which perl'). PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option is superseded by the HAVE_DOT option below. This is only a # fallback. It is recommended to install and use dot, since it yields more # powerful graphs. CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the # documentation. The MSCGEN_PATH tag allows you to specify the directory where # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. MSCGEN_PATH = # If set to YES, the inheritance and collaboration graphs will hide # inheritance and usage relations if the target is undocumented # or is not a class. HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz, a graph visualization # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) HAVE_DOT = NO # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is # allowed to run in parallel. When set to 0 (the default) doxygen will # base this on the number of processors available in the system. You can set it # explicitly to a value larger than 0 to get control over the balance # between CPU load and processing speed. DOT_NUM_THREADS = 0 # By default doxygen will write a font called FreeSans.ttf to the output # directory and reference it in all dot files that doxygen generates. This # font does not include all possible unicode characters however, so when you need # these (or just want a differently looking font) you can specify the font name # using DOT_FONTNAME. You need need to make sure dot is able to find the font, # which can be done by putting it in a standard location or by setting the # DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory # containing the font. DOT_FONTNAME = FreeSans.ttf # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. # The default size is 10pt. DOT_FONTSIZE = 10 # By default doxygen will tell dot to use the output directory to look for the # FreeSans.ttf font (which doxygen will put there itself). If you specify a # different font using DOT_FONTNAME you can set the path where dot # can find it using this tag. DOT_FONTPATH = # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # the CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = NO # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable call graphs # for selected functions only using the \callgraph command. CALL_GRAPH = NO # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then # doxygen will generate a caller dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. CALLER_GRAPH = NO # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are png, jpg, or gif # If left blank png will be used. DOT_IMAGE_FORMAT = png # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the # number of direct children of the root node in a graph is already larger than # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is disabled by default, because dot on Windows does not # seem to support this out of the box. Warning: Depending on the platform used, # enabling this option may lead to badly anti-aliased labels on the edges of # a graph (i.e. they become hard to read). DOT_TRANSPARENT = YES # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax # (see http://www.mathjax.org) which uses client side Javascript for # the rendering instead of using prerendered bitmaps. Use this if you # do not have LaTeX installed or if you want to formulas look prettier # in the HTML output. When enabled you also need to install MathJax # separately and configure the path to it using the MATHJAX_RELPATH # option. USE_MATHJAX = YES libm4rie-20130416/src/config.h.in000066400000000000000000000033061212302364300162730ustar00rootroot00000000000000/* m4rie/config.h.in. Generated from configure.ac by autoheader. */ /* Define to 1 if you have the header file. */ #undef HAVE_DLFCN_H /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H /* Define if OpenMP is enabled */ #undef HAVE_OPENMP /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H /* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H /* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H /* Define to 1 if you have the header file. */ #undef HAVE_STRING_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H /* Define to the sub-directory in which libtool stores uninstalled libraries. */ #undef LT_OBJDIR /* Define whether debugging is enabled */ #undef NDEBUG /* Name of package */ #undef PACKAGE /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT /* Define to the full name of this package. */ #undef PACKAGE_NAME /* Define to the full name and version of this package. */ #undef PACKAGE_STRING /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME /* Define to the home page for this package. */ #undef PACKAGE_URL /* Define to the version of this package. */ #undef PACKAGE_VERSION /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS /* Version number of package */ #undef VERSION libm4rie-20130416/src/conversion.c000066400000000000000000000412371212302364300166060ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010-2013 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "conversion.h" static inline word word_slice_64_02(word a) { a = (a & xcccccccc) | (a & xcccccccc>> 2)<< 1; a = (a & xf0f0f0f0) | (a & xf0f0f0f0>> 4)<< 2; a = (a & xff00ff00) | (a & xff00ff00>> 8)<< 4; a = (a & xffff0000) | (a & xffff0000>>16)<< 8; a = (a & xffffffff) | (a & xffffffff>>32)<<16; return a; } static inline word word_slice_64_04(word a) { a = (a & xf0f0f0f0) | (a & xf0f0f0f0>> 4)<< 3; a = (a & xff00ff00) | (a & xff00ff00>> 8)<< 6; a = (a & xffff0000) | (a & xffff0000>>16)<<12; a = (a & xffffffff) | (a & xffffffff>>32)<<24; return a; } static inline word word_cling_64_02(word a) { a = (a & xffff0000 & x__left32) | (a & (xffff0000>>16) & x__left32)>>16; a = (a & xff00ff00) | (a & xff00ff00>> 8)>> 8; a = (a & xf0f0f0f0) | (a & xf0f0f0f0>> 4)>> 4; a = (a & xcccccccc) | (a & xcccccccc>> 2)>> 2; a = (a & xaaaaaaaa) | (a & xaaaaaaaa>> 1)>> 1; return a; } static inline word word_cling_64_04(word a) { a = (a & xff00ff00 & x__left16) | (a & (xff00ff00>> 8) & x__left16)>>24; a = (a & xf0f0f0f0) | (a & xf0f0f0f0>> 4)>>12; a = (a & xcccccccc) | (a & xcccccccc>> 2)>> 6; a = (a & xaaaaaaaa) | (a & xaaaaaaaa>> 1)>> 3; return a; } mzd_slice_t *mzed_slice(mzd_slice_t *A, const mzed_t *Z) { if (A == NULL) { assert(Z->x->offset == 0); A = mzd_slice_init(Z->finite_field, Z->nrows, Z->ncols); } else { assert((Z->x->offset | A->x[0]->offset) == 0); mzd_slice_set_ui(A, 0); } switch(Z->finite_field->degree) { case 2: return _mzed_slice2(A,Z); case 3: return _mzed_slice4(A,Z); case 4: return _mzed_slice4(A,Z); case 5: return _mzed_slice8(A,Z); case 6: return _mzed_slice8(A,Z); case 7: return _mzed_slice8(A,Z); case 8: return _mzed_slice8(A,Z); case 9: return _mzed_slice16(A,Z); case 10: return _mzed_slice16(A,Z); case 11: return _mzed_slice16(A,Z); case 12: return _mzed_slice16(A,Z); case 13: return _mzed_slice16(A,Z); case 14: return _mzed_slice16(A,Z); case 15: return _mzed_slice16(A,Z); case 16: return _mzed_slice16(A,Z); default: m4ri_die("slicing not implemented for this degree"); } return A; } mzed_t *mzed_cling(mzed_t *A, const mzd_slice_t *Z) { if (A == NULL) { assert(Z->x[0]->offset == 0); A = mzed_init(Z->finite_field, Z->nrows, Z->ncols); } else { assert((A->x->offset | Z->x[0]->offset) == 0); mzed_set_ui(A, 0); } switch(Z->finite_field->degree) { case 2: return _mzed_cling2(A,Z); case 3: return _mzed_cling4(A,Z); case 4: return _mzed_cling4(A,Z); case 5: return _mzed_cling8(A,Z); case 6: return _mzed_cling8(A,Z); case 7: return _mzed_cling8(A,Z); case 8: return _mzed_cling8(A,Z); case 9: return _mzed_cling16(A,Z); case 10: return _mzed_cling16(A,Z); case 11: return _mzed_cling16(A,Z); case 12: return _mzed_cling16(A,Z); case 13: return _mzed_cling16(A,Z); case 14: return _mzed_cling16(A,Z); case 15: return _mzed_cling16(A,Z); case 16: return _mzed_cling16(A,Z); default: m4ri_die("clinging not implemented for this degree"); } return A; } mzd_slice_t *_mzed_slice2(mzd_slice_t *T, const mzed_t *F) { assert(T && (T->depth >= 2)); size_t j, j2 = 0; const word bitmask_end = __M4RI_LEFT_BITMASK((T->x[0]->offset + T->ncols) % m4ri_radix); register word r0,r1,r2,r3; if (mzed_is_zero(F)) return T; for(size_t i=0; inrows; i++) { word *t0 = T->x[0]->rows[i]; word *t1 = T->x[1]->rows[i]; const word *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+2 < F->x->width; j+=2,j2++) { r0 = f[j+0], r1 = f[j+1]; r2 = word_slice_64_02(r0<<1 & xaaaaaaaa); r3 = word_slice_64_02(r1<<1 & xaaaaaaaa); t0[j2] = r3 | (r2>>32); r2 = word_slice_64_02(r0<<0 & xaaaaaaaa); r3 = word_slice_64_02(r1<<0 & xaaaaaaaa); t1[j2] = r3 | (r2>>32); } switch(F->x->width - j) { case 2: r0 = f[j+0]; r1 = f[j+1]; r2 = word_slice_64_02(r0<<1 & xaaaaaaaa); r3 = word_slice_64_02(r1<<1 & xaaaaaaaa); t0[j2] &= ~bitmask_end; t0[j2] |= (r3 | (r2>>32)) & bitmask_end; r2 = word_slice_64_02(r0<<0 & xaaaaaaaa); r3 = word_slice_64_02(r1<<0 & xaaaaaaaa); t1[j2] &= ~bitmask_end; t1[j2] |= (r3 | (r2>>32)) & bitmask_end; break; case 1: r0 = f[j+0]; r2 = word_slice_64_02(r0<<1 & xaaaaaaaa); t0[j2] &= ~bitmask_end; t0[j2] |= (r2>>32) & bitmask_end; r2 = word_slice_64_02(r0<<0 & xaaaaaaaa); t1[j2] &= ~bitmask_end; t1[j2] |= (r2>>32) & bitmask_end; break; default: m4ri_die("impossible"); } } return T; } mzed_t *_mzed_cling2(mzed_t *T, const mzd_slice_t *F) { size_t j,j2 = 0; register word tmp; const word bitmask_end = __M4RI_LEFT_BITMASK((T->x->offset + T->x->ncols) % m4ri_radix); if (mzd_slice_is_zero(F)) return T; for(size_t i=0; inrows; i++) { const word *f0 = F->x[0]->rows[i]; const word *f1 = F->x[1]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+2 < T->x->width; j+=2, j2++) { t[j+0] = (word_cling_64_02(f0[j2]<<32)>>1) | (word_cling_64_02(f1[j2]<<32)>>0); t[j+1] = (word_cling_64_02(f0[j2]<< 0)>>1) | (word_cling_64_02(f1[j2]<< 0)>>0); } switch(T->x->width - j) { case 2: tmp = (word_cling_64_02(f0[j2]<< 0)>>1) | (word_cling_64_02(f1[j2]<< 0)>>0); t[j+0] = (word_cling_64_02(f0[j2]<<32)>>1) | (word_cling_64_02(f1[j2]<<32)>>0); t[j+1] = (t[j+1] & ~bitmask_end) | (tmp & bitmask_end); break; case 1: tmp = (word_cling_64_02(f0[j2]<<32)>>1) | (word_cling_64_02(f1[j2]<<32)>>0); t[j+0] = (t[j+0] & ~bitmask_end) | (tmp & bitmask_end); break; } } return T; } mzd_slice_t *_mzed_slice4(mzd_slice_t *T, const mzed_t *F) { assert(T && (T->depth == 3 || T->depth == 4) && T->x[0]->offset == 0); size_t j, j2 = 0; register word r0,r1,r2,r3 = 0; const word bitmask_end = __M4RI_LEFT_BITMASK((T->x[0]->offset + T->ncols) % m4ri_radix); if (mzed_is_zero(F)) return T; if (T->depth == 3) { for(size_t i=0; inrows; i++) { word *t0 = T->x[0]->rows[i]; word *t1 = T->x[1]->rows[i]; word *t2 = T->x[2]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+4 < F->x->width; j+=4,j2++) { t0[j2] = word_slice_64_04(f[j+0]<<3 & x88888888)>>48 | word_slice_64_04(f[j+1]<<3 & x88888888)>>32 \ | word_slice_64_04(f[j+2]<<3 & x88888888)>>16 | word_slice_64_04(f[j+3]<<3 & x88888888)>> 0; t1[j2] = word_slice_64_04(f[j+0]<<2 & x88888888)>>48 | word_slice_64_04(f[j+1]<<2 & x88888888)>>32 \ | word_slice_64_04(f[j+2]<<2 & x88888888)>>16 | word_slice_64_04(f[j+3]<<2 & x88888888)>> 0; t2[j2] = word_slice_64_04(f[j+0]<<1 & x88888888)>>48 | word_slice_64_04(f[j+1]<<1 & x88888888)>>32 \ | word_slice_64_04(f[j+2]<<1 & x88888888)>>16 | word_slice_64_04(f[j+3]<<1 & x88888888)>> 0; } r0 = r1 = r2 = 0; switch(F->x->width - j) { case 4: r0 |= word_slice_64_04(f[j+3]<<3 & x88888888)>> 0; r1 |= word_slice_64_04(f[j+3]<<2 & x88888888)>> 0; r2 |= word_slice_64_04(f[j+3]<<1 & x88888888)>> 0; case 3: r0 |= word_slice_64_04(f[j+2]<<3 & x88888888)>>16; r1 |= word_slice_64_04(f[j+2]<<2 & x88888888)>>16; r2 |= word_slice_64_04(f[j+2]<<1 & x88888888)>>16; case 2: r0 |= word_slice_64_04(f[j+1]<<3 & x88888888)>>32; r1 |= word_slice_64_04(f[j+1]<<2 & x88888888)>>32; r2 |= word_slice_64_04(f[j+1]<<1 & x88888888)>>32; case 1: r0 |= word_slice_64_04(f[j+0]<<3 & x88888888)>>48; r1 |= word_slice_64_04(f[j+0]<<2 & x88888888)>>48; r2 |= word_slice_64_04(f[j+0]<<1 & x88888888)>>48; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; t2[j2] |= r2 & bitmask_end; } } else { for(size_t i=0; inrows; i++) { word *t0 = T->x[0]->rows[i]; word *t1 = T->x[1]->rows[i]; word *t2 = T->x[2]->rows[i]; word *t3 = T->x[3]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+4 < F->x->width; j+=4,j2++) { t0[j2] = word_slice_64_04(f[j+0]<<3 & x88888888)>>48 | word_slice_64_04(f[j+1]<<3 & x88888888)>>32 \ | word_slice_64_04(f[j+2]<<3 & x88888888)>>16 | word_slice_64_04(f[j+3]<<3 & x88888888)>> 0; t1[j2] = word_slice_64_04(f[j+0]<<2 & x88888888)>>48 | word_slice_64_04(f[j+1]<<2 & x88888888)>>32 \ | word_slice_64_04(f[j+2]<<2 & x88888888)>>16 | word_slice_64_04(f[j+3]<<2 & x88888888)>> 0; t2[j2] = word_slice_64_04(f[j+0]<<1 & x88888888)>>48 | word_slice_64_04(f[j+1]<<1 & x88888888)>>32 \ | word_slice_64_04(f[j+2]<<1 & x88888888)>>16 | word_slice_64_04(f[j+3]<<1 & x88888888)>> 0; t3[j2] = word_slice_64_04(f[j+0]<<0 & x88888888)>>48 | word_slice_64_04(f[j+1]<<0 & x88888888)>>32 \ | word_slice_64_04(f[j+2]<<0 & x88888888)>>16 | word_slice_64_04(f[j+3]<<0 & x88888888)>> 0; } r0 = r1 = r2 = r3 = 0; switch(F->x->width - j) { case 4: r0 |= word_slice_64_04(f[j+3]<<3 & x88888888)>> 0; r1 |= word_slice_64_04(f[j+3]<<2 & x88888888)>> 0; r2 |= word_slice_64_04(f[j+3]<<1 & x88888888)>> 0; r3 |= word_slice_64_04(f[j+3]<<0 & x88888888)>> 0; case 3: r0 |= word_slice_64_04(f[j+2]<<3 & x88888888)>>16; r1 |= word_slice_64_04(f[j+2]<<2 & x88888888)>>16; r2 |= word_slice_64_04(f[j+2]<<1 & x88888888)>>16; r3 |= word_slice_64_04(f[j+2]<<0 & x88888888)>>16; case 2: r0 |= word_slice_64_04(f[j+1]<<3 & x88888888)>>32; r1 |= word_slice_64_04(f[j+1]<<2 & x88888888)>>32; r2 |= word_slice_64_04(f[j+1]<<1 & x88888888)>>32; r3 |= word_slice_64_04(f[j+1]<<0 & x88888888)>>32; case 1: r0 |= word_slice_64_04(f[j+0]<<3 & x88888888)>>48; r1 |= word_slice_64_04(f[j+0]<<2 & x88888888)>>48; r2 |= word_slice_64_04(f[j+0]<<1 & x88888888)>>48; r3 |= word_slice_64_04(f[j+0]<<0 & x88888888)>>48; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; t2[j2] |= r2 & bitmask_end; t3[j2] |= r3 & bitmask_end; } } return T; } mzed_t *_mzed_cling4(mzed_t *T, const mzd_slice_t *F) { size_t j,j2 = 0; const word bitmask_end = __M4RI_LEFT_BITMASK((T->x->offset + T->x->ncols) % m4ri_radix); if (mzd_slice_is_zero(F)) return T; if (F->finite_field->degree == 4) { for(rci_t i=0; inrows; i++) { const word *f0 = F->x[0]->rows[i]; const word *f1 = F->x[1]->rows[i]; const word *f2 = F->x[2]->rows[i]; const word *f3 = F->x[3]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+4 < T->x->width; j+=4, j2++) { t[j+0] = (word_cling_64_04(f0[j2]<<48)>>3) | (word_cling_64_04(f1[j2]<<48)>>2) | (word_cling_64_04(f2[j2]<<48)>>1) | (word_cling_64_04(f3[j2]<<48)>>0); t[j+1] = (word_cling_64_04(f0[j2]<<32)>>3) | (word_cling_64_04(f1[j2]<<32)>>2) | (word_cling_64_04(f2[j2]<<32)>>1) | (word_cling_64_04(f3[j2]<<32)>>0); t[j+2] = (word_cling_64_04(f0[j2]<<16)>>3) | (word_cling_64_04(f1[j2]<<16)>>2) | (word_cling_64_04(f2[j2]<<16)>>1) | (word_cling_64_04(f3[j2]<<16)>>0); t[j+3] = (word_cling_64_04(f0[j2]<< 0)>>3) | (word_cling_64_04(f1[j2]<< 0)>>2) | (word_cling_64_04(f2[j2]<< 0)>>1) | (word_cling_64_04(f3[j2]<< 0)>>0); } register word tmp=0; switch(T->x->width - j) { case 4: t[j+0] = (word_cling_64_04(f0[j2]<<48)>>3) | (word_cling_64_04(f1[j2]<<48)>>2) | (word_cling_64_04(f2[j2]<<48)>>1) | (word_cling_64_04(f3[j2]<<48)>>0); t[j+1] = (word_cling_64_04(f0[j2]<<32)>>3) | (word_cling_64_04(f1[j2]<<32)>>2) | (word_cling_64_04(f2[j2]<<32)>>1) | (word_cling_64_04(f3[j2]<<32)>>0); t[j+2] = (word_cling_64_04(f0[j2]<<16)>>3) | (word_cling_64_04(f1[j2]<<16)>>2) | (word_cling_64_04(f2[j2]<<16)>>1) | (word_cling_64_04(f3[j2]<<16)>>0); tmp = (word_cling_64_04(f0[j2]<< 0)>>3) | (word_cling_64_04(f1[j2]<< 0)>>2) | (word_cling_64_04(f2[j2]<< 0)>>1) | (word_cling_64_04(f3[j2]<< 0)>>0); t[j+3] = (t[j+3] & ~bitmask_end) | (tmp & bitmask_end); break; case 3: t[j+0] = (word_cling_64_04(f0[j2]<<48)>>3) | (word_cling_64_04(f1[j2]<<48)>>2) | (word_cling_64_04(f2[j2]<<48)>>1) | (word_cling_64_04(f3[j2]<<48)>>0); t[j+1] = (word_cling_64_04(f0[j2]<<32)>>3) | (word_cling_64_04(f1[j2]<<32)>>2) | (word_cling_64_04(f2[j2]<<32)>>1) | (word_cling_64_04(f3[j2]<<32)>>0); tmp = (word_cling_64_04(f0[j2]<<16)>>3) | (word_cling_64_04(f1[j2]<<16)>>2) | (word_cling_64_04(f2[j2]<<16)>>1) | (word_cling_64_04(f3[j2]<<16)>>0); t[j+2] = (t[j+2] & ~bitmask_end) | (tmp & bitmask_end); break; case 2: t[j+0] = (word_cling_64_04(f0[j2]<<48)>>3) | (word_cling_64_04(f1[j2]<<48)>>2) | (word_cling_64_04(f2[j2]<<48)>>1) | (word_cling_64_04(f3[j2]<<48)>>0); tmp = (word_cling_64_04(f0[j2]<<32)>>3) | (word_cling_64_04(f1[j2]<<32)>>2) | (word_cling_64_04(f2[j2]<<32)>>1) | (word_cling_64_04(f3[j2]<<32)>>0); t[j+1] = (t[j+1] & ~bitmask_end) | (tmp & bitmask_end); break; case 1: tmp = (word_cling_64_04(f0[j2]<<48)>>3) | (word_cling_64_04(f1[j2]<<48)>>2) | (word_cling_64_04(f2[j2]<<48)>>1) | (word_cling_64_04(f3[j2]<<48)>>0); t[j+0] = (t[j+0] & ~bitmask_end) | (tmp & bitmask_end); break; default: m4ri_die("impossible"); } } } else { //degree == 3 for(rci_t i=0; inrows; i++) { const word *f0 = F->x[0]->rows[i]; const word *f1 = F->x[1]->rows[i]; const word *f2 = F->x[2]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+4 < T->x->width; j+=4, j2++) { t[j+0] = (word_cling_64_04(f0[j2]<<48)>>3) | (word_cling_64_04(f1[j2]<<48)>>2) | (word_cling_64_04(f2[j2]<<48)>>1); t[j+1] = (word_cling_64_04(f0[j2]<<32)>>3) | (word_cling_64_04(f1[j2]<<32)>>2) | (word_cling_64_04(f2[j2]<<32)>>1); t[j+2] = (word_cling_64_04(f0[j2]<<16)>>3) | (word_cling_64_04(f1[j2]<<16)>>2) | (word_cling_64_04(f2[j2]<<16)>>1); t[j+3] = (word_cling_64_04(f0[j2]<< 0)>>3) | (word_cling_64_04(f1[j2]<< 0)>>2) | (word_cling_64_04(f2[j2]<< 0)>>1); } register word tmp=0; switch(T->x->width - j) { case 4: t[j+0] = (word_cling_64_04(f0[j2]<<48)>>3) | (word_cling_64_04(f1[j2]<<48)>>2) | (word_cling_64_04(f2[j2]<<48)>>1); t[j+1] = (word_cling_64_04(f0[j2]<<32)>>3) | (word_cling_64_04(f1[j2]<<32)>>2) | (word_cling_64_04(f2[j2]<<32)>>1); t[j+2] = (word_cling_64_04(f0[j2]<<16)>>3) | (word_cling_64_04(f1[j2]<<16)>>2) | (word_cling_64_04(f2[j2]<<16)>>1); tmp = (word_cling_64_04(f0[j2]<< 0)>>3) | (word_cling_64_04(f1[j2]<< 0)>>2) | (word_cling_64_04(f2[j2]<< 0)>>1); t[j+3] = (t[j+3] & ~bitmask_end) | (tmp & bitmask_end); break; case 3: t[j+0] = (word_cling_64_04(f0[j2]<<48)>>3) | (word_cling_64_04(f1[j2]<<48)>>2) | (word_cling_64_04(f2[j2]<<48)>>1); t[j+1] = (word_cling_64_04(f0[j2]<<32)>>3) | (word_cling_64_04(f1[j2]<<32)>>2) | (word_cling_64_04(f2[j2]<<32)>>1); tmp = (word_cling_64_04(f0[j2]<<16)>>3) | (word_cling_64_04(f1[j2]<<16)>>2) | (word_cling_64_04(f2[j2]<<16)>>1); t[j+2] = (t[j+2] & ~bitmask_end) | (tmp & bitmask_end); break; case 2: t[j+0] = (word_cling_64_04(f0[j2]<<48)>>3) | (word_cling_64_04(f1[j2]<<48)>>2) | (word_cling_64_04(f2[j2]<<48)>>1); tmp = (word_cling_64_04(f0[j2]<<32)>>3) | (word_cling_64_04(f1[j2]<<32)>>2) | (word_cling_64_04(f2[j2]<<32)>>1); t[j+1] = (t[j+1] & ~bitmask_end) | (tmp & bitmask_end); break; case 1: tmp = (word_cling_64_04(f0[j2]<<48)>>3) | (word_cling_64_04(f1[j2]<<48)>>2) | (word_cling_64_04(f2[j2]<<48)>>1); t[j+0] = (t[j+0] & ~bitmask_end) | (tmp & bitmask_end); break; default: m4ri_die("impossible"); } } } return T; } libm4rie-20130416/src/conversion.h000066400000000000000000000154511212302364300166120ustar00rootroot00000000000000/** * \file conversion.h * * \brief Conversion between mzed_t and mzd_slice_t * * \author Martin Albrecht */ #ifndef M4RIE_CONVERSION_H #define M4RIE_CONVERSION_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include #include /** * \brief Pack a bitslice matrix into a packed represenation. * * \param A Matrix over \GF2E or NULL * \param Z Bitslice matrix over \GF2E * * \ingroup Constructions */ mzed_t *mzed_cling(mzed_t *A, const mzd_slice_t *Z); /** * \brief Unpack the matrix Z into bitslice representation. * * \param A Bitslice matrix or NULL * \param Z Input matrix * * \ingroup Constructions */ mzd_slice_t *mzed_slice(mzd_slice_t *A, const mzed_t *Z); /** * \brief Unpack the matrix Z over GF(2^2) into bitslice representation. * * Elements in GF(2^2) can be represented as x*a + y where a is a root * of x^2 + x + 1. A0 contains the coefficients for x while A1 * contains the coefficients for y. * * \param A Zero bitslice matrix over GF(2^2) * \param Z Matrix over GF(2^2) */ mzd_slice_t *_mzed_slice2(mzd_slice_t *A, const mzed_t *Z); /** * \brief Unpack the matrix Z over \GF2E into bitslice representation. * * \param A Zero bitslice matrix over \GF2E * \param Z Matrix over \GF2E */ mzd_slice_t *_mzed_slice4(mzd_slice_t *A, const mzed_t *Z); /** * \brief Unpack the matrix Z over \GF2E into bitslice representation. * * \param A Zero bitslice matrix over \GF2E * \param Z Matrix over \GF2E */ mzd_slice_t *_mzed_slice8(mzd_slice_t *A, const mzed_t *Z); /** * \brief Unpack the matrix Z over \GF2E into bitslice representation. * * \param A Zero bitslice matrix over \GF2E * \param Z Matrix over \GF2E */ mzd_slice_t *_mzed_slice16(mzd_slice_t *A, const mzed_t *Z); /** * \brief Pack a bitslice matrix into a classical represenation over GF(2^2). * * Elements in GF(2^2) can be represented as c_1*a + c_0 where a is a * root of x^2 + x + 1. A1 contains the coefficients for c_1 while A0 * contains the coefficients for c_0. * * \param A Matrix over GF(2^2), must be zero * \param Z Bitslice matrix over GF(2^2) */ mzed_t *_mzed_cling2(mzed_t *A, const mzd_slice_t *Z); /** * \brief Pack a bitslice matrix into a classical represenation over \GF2E for 2 < e <= 4. * * \param A Matrix over \GF2E, must be zero * \param Z Bitslice matrix over \GF2E */ mzed_t *_mzed_cling4(mzed_t *A, const mzd_slice_t *Z); /** * \brief Pack a bitslice matrix into a classical represenation over \GF2E for 4 < e <= 8. * * \param A Matrix over \GF2E, must be zero * \param Z Bitslice matrix over \GF2E */ mzed_t *_mzed_cling8(mzed_t *A, const mzd_slice_t *Z); /** * \brief Pack a bitslice matrix into a classical represenation over \GF2E for 8 < e <= 16. * * \param A Matrix over \GF2E, must be zero * \param Z Bitslice matrix over \GF2E */ mzed_t *_mzed_cling16(mzed_t *A, const mzd_slice_t *Z); /** * \brief Compute C += A*B using Karatsuba multiplication of polynomials over GF(2). * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba */ static inline mzed_t *_mzed_mul_karatsuba(mzed_t *C, const mzed_t *A, const mzed_t *B) { mzd_slice_t *As,*Bs,*Cs; if(C) Cs = mzed_slice(NULL,C); else Cs = NULL; As = mzed_slice(NULL,A); Bs = mzed_slice(NULL,B); Cs = _mzd_slice_mul_karatsuba(Cs, As, Bs); C = mzed_cling(C, Cs); mzd_slice_free(As); mzd_slice_free(Bs); mzd_slice_free(Cs); return C; } /** * \brief Compute C = A*B. * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba */ static inline mzed_t *mzed_mul_karatsuba(mzed_t *C, const mzed_t *A, const mzed_t *B) { if (A->ncols != B->nrows || A->finite_field != B->finite_field) m4ri_die("mzed_mul_karatsuba: rows, columns and fields must match.\n"); if (C != NULL) { if (C->finite_field != A->finite_field || C->nrows != A->nrows || C->ncols != B->ncols) m4ri_die("mzed_mul_karatsuba: rows and columns of returned matrix must match.\n"); mzed_set_ui(C,0); } return _mzed_mul_karatsuba(C, A, B); } /** * \brief Compute C += A*B. * * \param C Preallocated return matrix. * \param A Input matrix A. * \param B Input matrix B. */ static inline mzed_t *mzed_addmul_karatsuba(mzed_t *C, const mzed_t *A, const mzed_t *B) { assert(C != NULL); if (A->ncols != B->nrows || A->finite_field != B->finite_field) m4ri_die("mzed_addmul_karatsuba: rows, columns and fields must match.\n"); if (C->finite_field != A->finite_field || C->nrows != A->nrows || C->ncols != B->ncols) m4ri_die("mzed_addmul_karatsuba: rows and columns of returned matrix must match.\n"); return _mzed_mul_karatsuba(C, A, B); } /** * \brief Recale the row r in A by X starting c. * * \param A Matrix * \param r Row index. * \param c Column index. * \param X Multiplier * * \ingroup RowOperations */ static inline void mzd_slice_rescale_row(mzd_slice_t *A, rci_t r, rci_t c, word x) { mzd_slice_t *A_w = mzd_slice_init_window(A, r, 0, r+1, A->ncols); mzed_t *A_we = mzed_cling(NULL, A_w); mzed_rescale_row(A_we, r, c, x); mzed_slice(A_w, A_we); mzed_free(A_we); mzd_slice_free_window(A_w); } /* * a bunch of constants to make code more readable */ static const word x80008000 = 0x8000800080008000ULL; static const word x80808080 = 0x8080808080808080ULL; static const word x88888888 = 0x8888888888888888ULL; static const word xaaaaaaaa = 0xaaaaaaaaaaaaaaaaULL; static const word xcccccccc = 0xccccccccccccccccULL; static const word xc0c0c0c0 = 0xc0c0c0c0c0c0c0c0ULL; static const word xf0f0f0f0 = 0xf0f0f0f0f0f0f0f0ULL; static const word xff00ff00 = 0xff00ff00ff00ff00ULL; static const word xffff0000 = 0xffff0000ffff0000ULL; static const word xffffffff = 0xffffffff00000000ULL; static const word x__left04 = 0xf000000000000000ULL; static const word x__left08 = 0xff00000000000000ULL; static const word x__left16 = 0xffff000000000000ULL; static const word x__left32 = 0xffffffff00000000ULL; #endif //M4RIE_CONVERSION_H libm4rie-20130416/src/conversion_cling16.c000066400000000000000000001342251212302364300201310ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010-2013 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "conversion.h" static inline word word_cling_64_16(word a) { a = (a & xcccccccc & x__left04) | (a & xcccccccc>> 2 & x__left04)>>30; a = (a & xaaaaaaaa) | (a & xaaaaaaaa>> 1)>> 15; return a; } mzed_t *_mzed_cling16(mzed_t *T, const mzd_slice_t *F) { wi_t j,j2 = 0; const word bitmask_end = __M4RI_LEFT_BITMASK((T->x->offset + T->x->ncols) % m4ri_radix); if (mzd_slice_is_zero(F)) return T; for(rci_t i=0; inrows; i++) { const word *f00 = F->x[ 0]->rows[i]; const word *f01 = F->x[ 1]->rows[i]; const word *f02 = F->x[ 2]->rows[i]; const word *f03 = F->x[ 3]->rows[i]; const word *f04 = F->x[ 4]->rows[i]; const word *f05 = F->x[ 5]->rows[i]; const word *f06 = F->x[ 6]->rows[i]; const word *f07 = F->x[ 7]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+16 < T->x->width; j+=16, j2++) { t[j+ 0] = (word_cling_64_16(f00[j2]<<60)>>15) | (word_cling_64_16(f01[j2]<<60)>>14) | (word_cling_64_16(f02[j2]<<60)>>13) | (word_cling_64_16(f03[j2]<<60)>>12) \ | (word_cling_64_16(f04[j2]<<60)>>11) | (word_cling_64_16(f05[j2]<<60)>>10) | (word_cling_64_16(f06[j2]<<60)>> 9) | (word_cling_64_16(f07[j2]<<60)>> 8); t[j+ 1] = (word_cling_64_16(f00[j2]<<56)>>15) | (word_cling_64_16(f01[j2]<<56)>>14) | (word_cling_64_16(f02[j2]<<56)>>13) | (word_cling_64_16(f03[j2]<<56)>>12) \ | (word_cling_64_16(f04[j2]<<56)>>11) | (word_cling_64_16(f05[j2]<<56)>>10) | (word_cling_64_16(f06[j2]<<56)>> 9) | (word_cling_64_16(f07[j2]<<56)>> 8); t[j+ 2] = (word_cling_64_16(f00[j2]<<52)>>15) | (word_cling_64_16(f01[j2]<<52)>>14) | (word_cling_64_16(f02[j2]<<52)>>13) | (word_cling_64_16(f03[j2]<<52)>>12) \ | (word_cling_64_16(f04[j2]<<52)>>11) | (word_cling_64_16(f05[j2]<<52)>>10) | (word_cling_64_16(f06[j2]<<52)>> 9) | (word_cling_64_16(f07[j2]<<52)>> 8); t[j+ 3] = (word_cling_64_16(f00[j2]<<48)>>15) | (word_cling_64_16(f01[j2]<<48)>>14) | (word_cling_64_16(f02[j2]<<48)>>13) | (word_cling_64_16(f03[j2]<<48)>>12) \ | (word_cling_64_16(f04[j2]<<48)>>11) | (word_cling_64_16(f05[j2]<<48)>>10) | (word_cling_64_16(f06[j2]<<48)>> 9) | (word_cling_64_16(f07[j2]<<48)>> 8); t[j+ 4] = (word_cling_64_16(f00[j2]<<44)>>15) | (word_cling_64_16(f01[j2]<<44)>>14) | (word_cling_64_16(f02[j2]<<44)>>13) | (word_cling_64_16(f03[j2]<<44)>>12) \ | (word_cling_64_16(f04[j2]<<44)>>11) | (word_cling_64_16(f05[j2]<<44)>>10) | (word_cling_64_16(f06[j2]<<44)>> 9) | (word_cling_64_16(f07[j2]<<44)>> 8); t[j+ 5] = (word_cling_64_16(f00[j2]<<40)>>15) | (word_cling_64_16(f01[j2]<<40)>>14) | (word_cling_64_16(f02[j2]<<40)>>13) | (word_cling_64_16(f03[j2]<<40)>>12) \ | (word_cling_64_16(f04[j2]<<40)>>11) | (word_cling_64_16(f05[j2]<<40)>>10) | (word_cling_64_16(f06[j2]<<40)>> 9) | (word_cling_64_16(f07[j2]<<40)>> 8); t[j+ 6] = (word_cling_64_16(f00[j2]<<36)>>15) | (word_cling_64_16(f01[j2]<<36)>>14) | (word_cling_64_16(f02[j2]<<36)>>13) | (word_cling_64_16(f03[j2]<<36)>>12) \ | (word_cling_64_16(f04[j2]<<36)>>11) | (word_cling_64_16(f05[j2]<<36)>>10) | (word_cling_64_16(f06[j2]<<36)>> 9) | (word_cling_64_16(f07[j2]<<36)>> 8); t[j+ 7] = (word_cling_64_16(f00[j2]<<32)>>15) | (word_cling_64_16(f01[j2]<<32)>>14) | (word_cling_64_16(f02[j2]<<32)>>13) | (word_cling_64_16(f03[j2]<<32)>>12) \ | (word_cling_64_16(f04[j2]<<32)>>11) | (word_cling_64_16(f05[j2]<<32)>>10) | (word_cling_64_16(f06[j2]<<32)>> 9) | (word_cling_64_16(f07[j2]<<32)>> 8); t[j+ 8] = (word_cling_64_16(f00[j2]<<28)>>15) | (word_cling_64_16(f01[j2]<<28)>>14) | (word_cling_64_16(f02[j2]<<28)>>13) | (word_cling_64_16(f03[j2]<<28)>>12) \ | (word_cling_64_16(f04[j2]<<28)>>11) | (word_cling_64_16(f05[j2]<<28)>>10) | (word_cling_64_16(f06[j2]<<28)>> 9) | (word_cling_64_16(f07[j2]<<28)>> 8); t[j+ 9] = (word_cling_64_16(f00[j2]<<24)>>15) | (word_cling_64_16(f01[j2]<<24)>>14) | (word_cling_64_16(f02[j2]<<24)>>13) | (word_cling_64_16(f03[j2]<<24)>>12) \ | (word_cling_64_16(f04[j2]<<24)>>11) | (word_cling_64_16(f05[j2]<<24)>>10) | (word_cling_64_16(f06[j2]<<24)>> 9) | (word_cling_64_16(f07[j2]<<24)>> 8); t[j+10] = (word_cling_64_16(f00[j2]<<20)>>15) | (word_cling_64_16(f01[j2]<<20)>>14) | (word_cling_64_16(f02[j2]<<20)>>13) | (word_cling_64_16(f03[j2]<<20)>>12) \ | (word_cling_64_16(f04[j2]<<20)>>11) | (word_cling_64_16(f05[j2]<<20)>>10) | (word_cling_64_16(f06[j2]<<20)>> 9) | (word_cling_64_16(f07[j2]<<20)>> 8); t[j+11] = (word_cling_64_16(f00[j2]<<16)>>15) | (word_cling_64_16(f01[j2]<<16)>>14) | (word_cling_64_16(f02[j2]<<16)>>13) | (word_cling_64_16(f03[j2]<<16)>>12) \ | (word_cling_64_16(f04[j2]<<16)>>11) | (word_cling_64_16(f05[j2]<<16)>>10) | (word_cling_64_16(f06[j2]<<16)>> 9) | (word_cling_64_16(f07[j2]<<16)>> 8); t[j+12] = (word_cling_64_16(f00[j2]<<12)>>15) | (word_cling_64_16(f01[j2]<<12)>>14) | (word_cling_64_16(f02[j2]<<12)>>13) | (word_cling_64_16(f03[j2]<<12)>>12) \ | (word_cling_64_16(f04[j2]<<12)>>11) | (word_cling_64_16(f05[j2]<<12)>>10) | (word_cling_64_16(f06[j2]<<12)>> 9) | (word_cling_64_16(f07[j2]<<12)>> 8); t[j+13] = (word_cling_64_16(f00[j2]<< 8)>>15) | (word_cling_64_16(f01[j2]<< 8)>>14) | (word_cling_64_16(f02[j2]<< 8)>>13) | (word_cling_64_16(f03[j2]<< 8)>>12) \ | (word_cling_64_16(f04[j2]<< 8)>>11) | (word_cling_64_16(f05[j2]<< 8)>>10) | (word_cling_64_16(f06[j2]<< 8)>> 9) | (word_cling_64_16(f07[j2]<< 8)>> 8); t[j+14] = (word_cling_64_16(f00[j2]<< 4)>>15) | (word_cling_64_16(f01[j2]<< 4)>>14) | (word_cling_64_16(f02[j2]<< 4)>>13) | (word_cling_64_16(f03[j2]<< 4)>>12) \ | (word_cling_64_16(f04[j2]<< 4)>>11) | (word_cling_64_16(f05[j2]<< 4)>>10) | (word_cling_64_16(f06[j2]<< 4)>> 9) | (word_cling_64_16(f07[j2]<< 4)>> 8); t[j+15] = (word_cling_64_16(f00[j2]<< 0)>>15) | (word_cling_64_16(f01[j2]<< 0)>>14) | (word_cling_64_16(f02[j2]<< 0)>>13) | (word_cling_64_16(f03[j2]<< 0)>>12) \ | (word_cling_64_16(f04[j2]<< 0)>>11) | (word_cling_64_16(f05[j2]<< 0)>>10) | (word_cling_64_16(f06[j2]<< 0)>> 9) | (word_cling_64_16(f07[j2]<< 0)>> 8); } register word tmp = t[T->x->width-1]; switch(T->x->width - j) { case 16: t[j+15] = (word_cling_64_16(f00[j2]<< 0)>>15) | (word_cling_64_16(f01[j2]<< 0)>>14) | (word_cling_64_16(f02[j2]<< 0)>>13) | (word_cling_64_16(f03[j2]<< 0)>>12) | \ (word_cling_64_16(f04[j2]<< 0)>>11) | (word_cling_64_16(f05[j2]<< 0)>>10) | (word_cling_64_16(f06[j2]<< 0)>> 9) | (word_cling_64_16(f07[j2]<< 0)>> 8); case 15: t[j+14] = (word_cling_64_16(f00[j2]<< 4)>>15) | (word_cling_64_16(f01[j2]<< 4)>>14) | (word_cling_64_16(f02[j2]<< 4)>>13) | (word_cling_64_16(f03[j2]<< 4)>>12) | \ (word_cling_64_16(f04[j2]<< 4)>>11) | (word_cling_64_16(f05[j2]<< 4)>>10) | (word_cling_64_16(f06[j2]<< 4)>> 9) | (word_cling_64_16(f07[j2]<< 4)>> 8); case 14: t[j+13] = (word_cling_64_16(f00[j2]<< 8)>>15) | (word_cling_64_16(f01[j2]<< 8)>>14) | (word_cling_64_16(f02[j2]<< 8)>>13) | (word_cling_64_16(f03[j2]<< 8)>>12) | \ (word_cling_64_16(f04[j2]<< 8)>>11) | (word_cling_64_16(f05[j2]<< 8)>>10) | (word_cling_64_16(f06[j2]<< 8)>> 9) | (word_cling_64_16(f07[j2]<< 8)>> 8); case 13: t[j+12] = (word_cling_64_16(f00[j2]<<12)>>15) | (word_cling_64_16(f01[j2]<<12)>>14) | (word_cling_64_16(f02[j2]<<12)>>13) | (word_cling_64_16(f03[j2]<<12)>>12) | \ (word_cling_64_16(f04[j2]<<12)>>11) | (word_cling_64_16(f05[j2]<<12)>>10) | (word_cling_64_16(f06[j2]<<12)>> 9) | (word_cling_64_16(f07[j2]<<12)>> 8); case 12: t[j+11] = (word_cling_64_16(f00[j2]<<16)>>15) | (word_cling_64_16(f01[j2]<<16)>>14) | (word_cling_64_16(f02[j2]<<16)>>13) | (word_cling_64_16(f03[j2]<<16)>>12) | \ (word_cling_64_16(f04[j2]<<16)>>11) | (word_cling_64_16(f05[j2]<<16)>>10) | (word_cling_64_16(f06[j2]<<16)>> 9) | (word_cling_64_16(f07[j2]<<16)>> 8); case 11: t[j+10] = (word_cling_64_16(f00[j2]<<20)>>15) | (word_cling_64_16(f01[j2]<<20)>>14) | (word_cling_64_16(f02[j2]<<20)>>13) | (word_cling_64_16(f03[j2]<<20)>>12) | \ (word_cling_64_16(f04[j2]<<20)>>11) | (word_cling_64_16(f05[j2]<<20)>>10) | (word_cling_64_16(f06[j2]<<20)>> 9) | (word_cling_64_16(f07[j2]<<20)>> 8); case 10: t[j+ 9] = (word_cling_64_16(f00[j2]<<24)>>15) | (word_cling_64_16(f01[j2]<<24)>>14) | (word_cling_64_16(f02[j2]<<24)>>13) | (word_cling_64_16(f03[j2]<<24)>>12) | \ (word_cling_64_16(f04[j2]<<24)>>11) | (word_cling_64_16(f05[j2]<<24)>>10) | (word_cling_64_16(f06[j2]<<24)>> 9) | (word_cling_64_16(f07[j2]<<24)>> 8); case 9: t[j+ 8] = (word_cling_64_16(f00[j2]<<28)>>15) | (word_cling_64_16(f01[j2]<<28)>>14) | (word_cling_64_16(f02[j2]<<28)>>13) | (word_cling_64_16(f03[j2]<<28)>>12) | \ (word_cling_64_16(f04[j2]<<28)>>11) | (word_cling_64_16(f05[j2]<<28)>>10) | (word_cling_64_16(f06[j2]<<28)>> 9) | (word_cling_64_16(f07[j2]<<28)>> 8); case 8: t[j+ 7] = (word_cling_64_16(f00[j2]<<32)>>15) | (word_cling_64_16(f01[j2]<<32)>>14) | (word_cling_64_16(f02[j2]<<32)>>13) | (word_cling_64_16(f03[j2]<<32)>>12) | \ (word_cling_64_16(f04[j2]<<32)>>11) | (word_cling_64_16(f05[j2]<<32)>>10) | (word_cling_64_16(f06[j2]<<32)>> 9) | (word_cling_64_16(f07[j2]<<32)>> 8); case 7: t[j+ 6] = (word_cling_64_16(f00[j2]<<36)>>15) | (word_cling_64_16(f01[j2]<<36)>>14) | (word_cling_64_16(f02[j2]<<36)>>13) | (word_cling_64_16(f03[j2]<<36)>>12) | \ (word_cling_64_16(f04[j2]<<36)>>11) | (word_cling_64_16(f05[j2]<<36)>>10) | (word_cling_64_16(f06[j2]<<36)>> 9) | (word_cling_64_16(f07[j2]<<36)>> 8); case 6: t[j+ 5] = (word_cling_64_16(f00[j2]<<40)>>15) | (word_cling_64_16(f01[j2]<<40)>>14) | (word_cling_64_16(f02[j2]<<40)>>13) | (word_cling_64_16(f03[j2]<<40)>>12) | \ (word_cling_64_16(f04[j2]<<40)>>11) | (word_cling_64_16(f05[j2]<<40)>>10) | (word_cling_64_16(f06[j2]<<40)>> 9) | (word_cling_64_16(f07[j2]<<40)>> 8); case 5: t[j+ 4] = (word_cling_64_16(f00[j2]<<44)>>15) | (word_cling_64_16(f01[j2]<<44)>>14) | (word_cling_64_16(f02[j2]<<44)>>13) | (word_cling_64_16(f03[j2]<<44)>>12) | \ (word_cling_64_16(f04[j2]<<44)>>11) | (word_cling_64_16(f05[j2]<<44)>>10) | (word_cling_64_16(f06[j2]<<44)>> 9) | (word_cling_64_16(f07[j2]<<44)>> 8); case 4: t[j+ 3] = (word_cling_64_16(f00[j2]<<48)>>15) | (word_cling_64_16(f01[j2]<<48)>>14) | (word_cling_64_16(f02[j2]<<48)>>13) | (word_cling_64_16(f03[j2]<<48)>>12) | \ (word_cling_64_16(f04[j2]<<48)>>11) | (word_cling_64_16(f05[j2]<<48)>>10) | (word_cling_64_16(f06[j2]<<48)>> 9) | (word_cling_64_16(f07[j2]<<48)>> 8); case 3: t[j+ 2] = (word_cling_64_16(f00[j2]<<52)>>15) | (word_cling_64_16(f01[j2]<<52)>>14) | (word_cling_64_16(f02[j2]<<52)>>13) | (word_cling_64_16(f03[j2]<<52)>>12) | \ (word_cling_64_16(f04[j2]<<52)>>11) | (word_cling_64_16(f05[j2]<<52)>>10) | (word_cling_64_16(f06[j2]<<52)>> 9) | (word_cling_64_16(f07[j2]<<52)>> 8); case 2: t[j+ 1] = (word_cling_64_16(f00[j2]<<56)>>15) | (word_cling_64_16(f01[j2]<<56)>>14) | (word_cling_64_16(f02[j2]<<56)>>13) | (word_cling_64_16(f03[j2]<<56)>>12) | \ (word_cling_64_16(f04[j2]<<56)>>11) | (word_cling_64_16(f05[j2]<<56)>>10) | (word_cling_64_16(f06[j2]<<56)>> 9) | (word_cling_64_16(f07[j2]<<56)>> 8); case 1: t[j+ 0] = (word_cling_64_16(f00[j2]<<60)>>15) | (word_cling_64_16(f01[j2]<<60)>>14) | (word_cling_64_16(f02[j2]<<60)>>13) | (word_cling_64_16(f03[j2]<<60)>>12) | \ (word_cling_64_16(f04[j2]<<60)>>11) | (word_cling_64_16(f05[j2]<<60)>>10) | (word_cling_64_16(f06[j2]<<60)>> 9) | (word_cling_64_16(f07[j2]<<60)>> 8); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } if(T->finite_field->degree < 12) { switch(T->finite_field->degree) { case 9: { for(rci_t i=0; inrows; i++) { const word *f00 = F->x[ 8]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+16 < T->x->width; j+=16, j2++) { t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>7); t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>7); t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>7); t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>7); t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>7); t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>7); t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>7); t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>7); t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>7); t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>7); t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>7); t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>7); t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>7); t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>7); t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>7); t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>7); } register word tmp = t[T->x->width-1]; switch(T->x->width - j) { case 16: t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>7); case 15: t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>7); case 14: t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>7); case 13: t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>7); case 12: t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>7); case 11: t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>7); case 10: t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>7); case 9: t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>7); case 8: t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>7); case 7: t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>7); case 6: t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>7); case 5: t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>7); case 4: t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>7); case 3: t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>7); case 2: t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>7); case 1: t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>7); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } } break; case 10: { for(rci_t i=0; inrows; i++) { const word *f00 = F->x[ 8]->rows[i]; const word *f01 = F->x[ 9]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+16 < T->x->width; j+=16, j2++) { t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>7) | (word_cling_64_16(f01[j2]<<60)>>6); t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>7) | (word_cling_64_16(f01[j2]<<56)>>6); t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>7) | (word_cling_64_16(f01[j2]<<52)>>6); t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>7) | (word_cling_64_16(f01[j2]<<48)>>6); t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>7) | (word_cling_64_16(f01[j2]<<44)>>6); t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>7) | (word_cling_64_16(f01[j2]<<40)>>6); t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>7) | (word_cling_64_16(f01[j2]<<36)>>6); t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>7) | (word_cling_64_16(f01[j2]<<32)>>6); t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>7) | (word_cling_64_16(f01[j2]<<28)>>6); t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>7) | (word_cling_64_16(f01[j2]<<24)>>6); t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>7) | (word_cling_64_16(f01[j2]<<20)>>6); t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>7) | (word_cling_64_16(f01[j2]<<16)>>6); t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>7) | (word_cling_64_16(f01[j2]<<12)>>6); t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>7) | (word_cling_64_16(f01[j2]<< 8)>>6); t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>7) | (word_cling_64_16(f01[j2]<< 4)>>6); t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>7) | (word_cling_64_16(f01[j2]<< 0)>>6); } register word tmp = t[T->x->width-1]; switch(T->x->width - j) { case 16: t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>7) | (word_cling_64_16(f01[j2]<< 0)>>6); case 15: t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>7) | (word_cling_64_16(f01[j2]<< 4)>>6); case 14: t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>7) | (word_cling_64_16(f01[j2]<< 8)>>6); case 13: t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>7) | (word_cling_64_16(f01[j2]<<12)>>6); case 12: t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>7) | (word_cling_64_16(f01[j2]<<16)>>6); case 11: t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>7) | (word_cling_64_16(f01[j2]<<20)>>6); case 10: t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>7) | (word_cling_64_16(f01[j2]<<24)>>6); case 9: t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>7) | (word_cling_64_16(f01[j2]<<28)>>6); case 8: t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>7) | (word_cling_64_16(f01[j2]<<32)>>6); case 7: t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>7) | (word_cling_64_16(f01[j2]<<36)>>6); case 6: t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>7) | (word_cling_64_16(f01[j2]<<40)>>6); case 5: t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>7) | (word_cling_64_16(f01[j2]<<44)>>6); case 4: t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>7) | (word_cling_64_16(f01[j2]<<48)>>6); case 3: t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>7) | (word_cling_64_16(f01[j2]<<52)>>6); case 2: t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>7) | (word_cling_64_16(f01[j2]<<56)>>6); case 1: t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>7) | (word_cling_64_16(f01[j2]<<60)>>6); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } } break; case 11: { for(rci_t i=0; inrows; i++) { const word *f00 = F->x[ 8]->rows[i]; const word *f01 = F->x[ 9]->rows[i]; const word *f02 = F->x[10]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+16 < T->x->width; j+=16, j2++) { t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>7) | (word_cling_64_16(f01[j2]<<60)>>6) | (word_cling_64_16(f02[j2]<<60)>>5); t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>7) | (word_cling_64_16(f01[j2]<<56)>>6) | (word_cling_64_16(f02[j2]<<56)>>5); t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>7) | (word_cling_64_16(f01[j2]<<52)>>6) | (word_cling_64_16(f02[j2]<<52)>>5); t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>7) | (word_cling_64_16(f01[j2]<<48)>>6) | (word_cling_64_16(f02[j2]<<48)>>5); t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>7) | (word_cling_64_16(f01[j2]<<44)>>6) | (word_cling_64_16(f02[j2]<<44)>>5); t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>7) | (word_cling_64_16(f01[j2]<<40)>>6) | (word_cling_64_16(f02[j2]<<40)>>5); t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>7) | (word_cling_64_16(f01[j2]<<36)>>6) | (word_cling_64_16(f02[j2]<<36)>>5); t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>7) | (word_cling_64_16(f01[j2]<<32)>>6) | (word_cling_64_16(f02[j2]<<32)>>5); t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>7) | (word_cling_64_16(f01[j2]<<28)>>6) | (word_cling_64_16(f02[j2]<<28)>>5); t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>7) | (word_cling_64_16(f01[j2]<<24)>>6) | (word_cling_64_16(f02[j2]<<24)>>5); t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>7) | (word_cling_64_16(f01[j2]<<20)>>6) | (word_cling_64_16(f02[j2]<<20)>>5); t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>7) | (word_cling_64_16(f01[j2]<<16)>>6) | (word_cling_64_16(f02[j2]<<16)>>5); t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>7) | (word_cling_64_16(f01[j2]<<12)>>6) | (word_cling_64_16(f02[j2]<<12)>>5); t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>7) | (word_cling_64_16(f01[j2]<< 8)>>6) | (word_cling_64_16(f02[j2]<< 8)>>5); t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>7) | (word_cling_64_16(f01[j2]<< 4)>>6) | (word_cling_64_16(f02[j2]<< 4)>>5); t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>7) | (word_cling_64_16(f01[j2]<< 0)>>6) | (word_cling_64_16(f02[j2]<< 0)>>5); } register word tmp = t[T->x->width-1]; switch(T->x->width - j) { case 16: t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>7) | (word_cling_64_16(f01[j2]<< 0)>>6) | (word_cling_64_16(f02[j2]<< 0)>>5); case 15: t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>7) | (word_cling_64_16(f01[j2]<< 4)>>6) | (word_cling_64_16(f02[j2]<< 4)>>5); case 14: t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>7) | (word_cling_64_16(f01[j2]<< 8)>>6) | (word_cling_64_16(f02[j2]<< 8)>>5); case 13: t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>7) | (word_cling_64_16(f01[j2]<<12)>>6) | (word_cling_64_16(f02[j2]<<12)>>5); case 12: t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>7) | (word_cling_64_16(f01[j2]<<16)>>6) | (word_cling_64_16(f02[j2]<<16)>>5); case 11: t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>7) | (word_cling_64_16(f01[j2]<<20)>>6) | (word_cling_64_16(f02[j2]<<20)>>5); case 10: t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>7) | (word_cling_64_16(f01[j2]<<24)>>6) | (word_cling_64_16(f02[j2]<<24)>>5); case 9: t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>7) | (word_cling_64_16(f01[j2]<<28)>>6) | (word_cling_64_16(f02[j2]<<28)>>5); case 8: t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>7) | (word_cling_64_16(f01[j2]<<32)>>6) | (word_cling_64_16(f02[j2]<<32)>>5); case 7: t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>7) | (word_cling_64_16(f01[j2]<<36)>>6) | (word_cling_64_16(f02[j2]<<36)>>5); case 6: t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>7) | (word_cling_64_16(f01[j2]<<40)>>6) | (word_cling_64_16(f02[j2]<<40)>>5); case 5: t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>7) | (word_cling_64_16(f01[j2]<<44)>>6) | (word_cling_64_16(f02[j2]<<44)>>5); case 4: t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>7) | (word_cling_64_16(f01[j2]<<48)>>6) | (word_cling_64_16(f02[j2]<<48)>>5); case 3: t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>7) | (word_cling_64_16(f01[j2]<<52)>>6) | (word_cling_64_16(f02[j2]<<52)>>5); case 2: t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>7) | (word_cling_64_16(f01[j2]<<56)>>6) | (word_cling_64_16(f02[j2]<<56)>>5); case 1: t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>7) | (word_cling_64_16(f01[j2]<<60)>>6) | (word_cling_64_16(f02[j2]<<60)>>5); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } } break; } } else { for(rci_t i=0; inrows; i++) { const word *f00 = F->x[ 8]->rows[i]; const word *f01 = F->x[ 9]->rows[i]; const word *f02 = F->x[10]->rows[i]; const word *f03 = F->x[11]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+16 < T->x->width; j+=16, j2++) { t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>7) | (word_cling_64_16(f01[j2]<<60)>>6) | (word_cling_64_16(f02[j2]<<60)>>5) | (word_cling_64_16(f03[j2]<<60)>>4); t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>7) | (word_cling_64_16(f01[j2]<<56)>>6) | (word_cling_64_16(f02[j2]<<56)>>5) | (word_cling_64_16(f03[j2]<<56)>>4); t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>7) | (word_cling_64_16(f01[j2]<<52)>>6) | (word_cling_64_16(f02[j2]<<52)>>5) | (word_cling_64_16(f03[j2]<<52)>>4); t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>7) | (word_cling_64_16(f01[j2]<<48)>>6) | (word_cling_64_16(f02[j2]<<48)>>5) | (word_cling_64_16(f03[j2]<<48)>>4); t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>7) | (word_cling_64_16(f01[j2]<<44)>>6) | (word_cling_64_16(f02[j2]<<44)>>5) | (word_cling_64_16(f03[j2]<<44)>>4); t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>7) | (word_cling_64_16(f01[j2]<<40)>>6) | (word_cling_64_16(f02[j2]<<40)>>5) | (word_cling_64_16(f03[j2]<<40)>>4); t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>7) | (word_cling_64_16(f01[j2]<<36)>>6) | (word_cling_64_16(f02[j2]<<36)>>5) | (word_cling_64_16(f03[j2]<<36)>>4); t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>7) | (word_cling_64_16(f01[j2]<<32)>>6) | (word_cling_64_16(f02[j2]<<32)>>5) | (word_cling_64_16(f03[j2]<<32)>>4); t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>7) | (word_cling_64_16(f01[j2]<<28)>>6) | (word_cling_64_16(f02[j2]<<28)>>5) | (word_cling_64_16(f03[j2]<<28)>>4); t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>7) | (word_cling_64_16(f01[j2]<<24)>>6) | (word_cling_64_16(f02[j2]<<24)>>5) | (word_cling_64_16(f03[j2]<<24)>>4); t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>7) | (word_cling_64_16(f01[j2]<<20)>>6) | (word_cling_64_16(f02[j2]<<20)>>5) | (word_cling_64_16(f03[j2]<<20)>>4); t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>7) | (word_cling_64_16(f01[j2]<<16)>>6) | (word_cling_64_16(f02[j2]<<16)>>5) | (word_cling_64_16(f03[j2]<<16)>>4); t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>7) | (word_cling_64_16(f01[j2]<<12)>>6) | (word_cling_64_16(f02[j2]<<12)>>5) | (word_cling_64_16(f03[j2]<<12)>>4); t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>7) | (word_cling_64_16(f01[j2]<< 8)>>6) | (word_cling_64_16(f02[j2]<< 8)>>5) | (word_cling_64_16(f03[j2]<< 8)>>4); t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>7) | (word_cling_64_16(f01[j2]<< 4)>>6) | (word_cling_64_16(f02[j2]<< 4)>>5) | (word_cling_64_16(f03[j2]<< 4)>>4); t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>7) | (word_cling_64_16(f01[j2]<< 0)>>6) | (word_cling_64_16(f02[j2]<< 0)>>5) | (word_cling_64_16(f03[j2]<< 0)>>4); } register word tmp = t[T->x->width-1]; switch(T->x->width - j) { case 16: t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>7) | (word_cling_64_16(f01[j2]<< 0)>>6) | (word_cling_64_16(f02[j2]<< 0)>>5) | (word_cling_64_16(f03[j2]<< 0)>>4); case 15: t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>7) | (word_cling_64_16(f01[j2]<< 4)>>6) | (word_cling_64_16(f02[j2]<< 4)>>5) | (word_cling_64_16(f03[j2]<< 4)>>4); case 14: t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>7) | (word_cling_64_16(f01[j2]<< 8)>>6) | (word_cling_64_16(f02[j2]<< 8)>>5) | (word_cling_64_16(f03[j2]<< 8)>>4); case 13: t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>7) | (word_cling_64_16(f01[j2]<<12)>>6) | (word_cling_64_16(f02[j2]<<12)>>5) | (word_cling_64_16(f03[j2]<<12)>>4); case 12: t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>7) | (word_cling_64_16(f01[j2]<<16)>>6) | (word_cling_64_16(f02[j2]<<16)>>5) | (word_cling_64_16(f03[j2]<<16)>>4); case 11: t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>7) | (word_cling_64_16(f01[j2]<<20)>>6) | (word_cling_64_16(f02[j2]<<20)>>5) | (word_cling_64_16(f03[j2]<<20)>>4); case 10: t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>7) | (word_cling_64_16(f01[j2]<<24)>>6) | (word_cling_64_16(f02[j2]<<24)>>5) | (word_cling_64_16(f03[j2]<<24)>>4); case 9: t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>7) | (word_cling_64_16(f01[j2]<<28)>>6) | (word_cling_64_16(f02[j2]<<28)>>5) | (word_cling_64_16(f03[j2]<<28)>>4); case 8: t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>7) | (word_cling_64_16(f01[j2]<<32)>>6) | (word_cling_64_16(f02[j2]<<32)>>5) | (word_cling_64_16(f03[j2]<<32)>>4); case 7: t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>7) | (word_cling_64_16(f01[j2]<<36)>>6) | (word_cling_64_16(f02[j2]<<36)>>5) | (word_cling_64_16(f03[j2]<<36)>>4); case 6: t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>7) | (word_cling_64_16(f01[j2]<<40)>>6) | (word_cling_64_16(f02[j2]<<40)>>5) | (word_cling_64_16(f03[j2]<<40)>>4); case 5: t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>7) | (word_cling_64_16(f01[j2]<<44)>>6) | (word_cling_64_16(f02[j2]<<44)>>5) | (word_cling_64_16(f03[j2]<<44)>>4); case 4: t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>7) | (word_cling_64_16(f01[j2]<<48)>>6) | (word_cling_64_16(f02[j2]<<48)>>5) | (word_cling_64_16(f03[j2]<<48)>>4); case 3: t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>7) | (word_cling_64_16(f01[j2]<<52)>>6) | (word_cling_64_16(f02[j2]<<52)>>5) | (word_cling_64_16(f03[j2]<<52)>>4); case 2: t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>7) | (word_cling_64_16(f01[j2]<<56)>>6) | (word_cling_64_16(f02[j2]<<56)>>5) | (word_cling_64_16(f03[j2]<<56)>>4); case 1: t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>7) | (word_cling_64_16(f01[j2]<<60)>>6) | (word_cling_64_16(f02[j2]<<60)>>5) | (word_cling_64_16(f03[j2]<<60)>>4); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); switch(T->finite_field->degree) { case 13: { for(rci_t i=0; inrows; i++) { const word *f00 = F->x[12]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+16 < T->x->width; j+=16, j2++) { t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>3); t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>3); t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>3); t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>3); t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>3); t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>3); t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>3); t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>3); t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>3); t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>3); t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>3); t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>3); t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>3); t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>3); t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>3); t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>3); } register word tmp = t[T->x->width-1]; switch(T->x->width - j) { case 16: t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>3); case 15: t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>3); case 14: t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>3); case 13: t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>3); case 12: t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>3); case 11: t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>3); case 10: t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>3); case 9: t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>3); case 8: t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>3); case 7: t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>3); case 6: t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>3); case 5: t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>3); case 4: t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>3); case 3: t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>3); case 2: t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>3); case 1: t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>3); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } } break; case 14: { for(rci_t i=0; inrows; i++) { const word *f00 = F->x[12]->rows[i]; const word *f01 = F->x[13]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+16 < T->x->width; j+=16, j2++) { t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>3) | (word_cling_64_16(f01[j2]<<60)>>2); t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>3) | (word_cling_64_16(f01[j2]<<56)>>2); t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>3) | (word_cling_64_16(f01[j2]<<52)>>2); t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>3) | (word_cling_64_16(f01[j2]<<48)>>2); t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>3) | (word_cling_64_16(f01[j2]<<44)>>2); t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>3) | (word_cling_64_16(f01[j2]<<40)>>2); t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>3) | (word_cling_64_16(f01[j2]<<36)>>2); t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>3) | (word_cling_64_16(f01[j2]<<32)>>2); t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>3) | (word_cling_64_16(f01[j2]<<28)>>2); t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>3) | (word_cling_64_16(f01[j2]<<24)>>2); t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>3) | (word_cling_64_16(f01[j2]<<20)>>2); t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>3) | (word_cling_64_16(f01[j2]<<16)>>2); t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>3) | (word_cling_64_16(f01[j2]<<12)>>2); t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>3) | (word_cling_64_16(f01[j2]<< 8)>>2); t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>3) | (word_cling_64_16(f01[j2]<< 4)>>2); t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>3) | (word_cling_64_16(f01[j2]<< 0)>>2); } register word tmp = t[T->x->width-1]; switch(T->x->width - j) { case 16: t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>3) | (word_cling_64_16(f01[j2]<< 0)>>2); case 15: t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>3) | (word_cling_64_16(f01[j2]<< 4)>>2); case 14: t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>3) | (word_cling_64_16(f01[j2]<< 8)>>2); case 13: t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>3) | (word_cling_64_16(f01[j2]<<12)>>2); case 12: t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>3) | (word_cling_64_16(f01[j2]<<16)>>2); case 11: t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>3) | (word_cling_64_16(f01[j2]<<20)>>2); case 10: t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>3) | (word_cling_64_16(f01[j2]<<24)>>2); case 9: t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>3) | (word_cling_64_16(f01[j2]<<28)>>2); case 8: t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>3) | (word_cling_64_16(f01[j2]<<32)>>2); case 7: t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>3) | (word_cling_64_16(f01[j2]<<36)>>2); case 6: t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>3) | (word_cling_64_16(f01[j2]<<40)>>2); case 5: t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>3) | (word_cling_64_16(f01[j2]<<44)>>2); case 4: t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>3) | (word_cling_64_16(f01[j2]<<48)>>2); case 3: t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>3) | (word_cling_64_16(f01[j2]<<52)>>2); case 2: t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>3) | (word_cling_64_16(f01[j2]<<56)>>2); case 1: t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>3) | (word_cling_64_16(f01[j2]<<60)>>2); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } } break; case 15: { for(rci_t i=0; inrows; i++) { const word *f00 = F->x[12]->rows[i]; const word *f01 = F->x[13]->rows[i]; const word *f02 = F->x[14]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+16 < T->x->width; j+=16, j2++) { t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>3) | (word_cling_64_16(f01[j2]<<60)>>2) | (word_cling_64_16(f02[j2]<<60)>>1); t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>3) | (word_cling_64_16(f01[j2]<<56)>>2) | (word_cling_64_16(f02[j2]<<56)>>1); t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>3) | (word_cling_64_16(f01[j2]<<52)>>2) | (word_cling_64_16(f02[j2]<<52)>>1); t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>3) | (word_cling_64_16(f01[j2]<<48)>>2) | (word_cling_64_16(f02[j2]<<48)>>1); t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>3) | (word_cling_64_16(f01[j2]<<44)>>2) | (word_cling_64_16(f02[j2]<<44)>>1); t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>3) | (word_cling_64_16(f01[j2]<<40)>>2) | (word_cling_64_16(f02[j2]<<40)>>1); t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>3) | (word_cling_64_16(f01[j2]<<36)>>2) | (word_cling_64_16(f02[j2]<<36)>>1); t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>3) | (word_cling_64_16(f01[j2]<<32)>>2) | (word_cling_64_16(f02[j2]<<32)>>1); t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>3) | (word_cling_64_16(f01[j2]<<28)>>2) | (word_cling_64_16(f02[j2]<<28)>>1); t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>3) | (word_cling_64_16(f01[j2]<<24)>>2) | (word_cling_64_16(f02[j2]<<24)>>1); t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>3) | (word_cling_64_16(f01[j2]<<20)>>2) | (word_cling_64_16(f02[j2]<<20)>>1); t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>3) | (word_cling_64_16(f01[j2]<<16)>>2) | (word_cling_64_16(f02[j2]<<16)>>1); t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>3) | (word_cling_64_16(f01[j2]<<12)>>2) | (word_cling_64_16(f02[j2]<<12)>>1); t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>3) | (word_cling_64_16(f01[j2]<< 8)>>2) | (word_cling_64_16(f02[j2]<< 8)>>1); t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>3) | (word_cling_64_16(f01[j2]<< 4)>>2) | (word_cling_64_16(f02[j2]<< 4)>>1); t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>3) | (word_cling_64_16(f01[j2]<< 0)>>2) | (word_cling_64_16(f02[j2]<< 0)>>1); } register word tmp = t[T->x->width-1]; switch(T->x->width - j) { case 16: t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>3) | (word_cling_64_16(f01[j2]<< 0)>>2) | (word_cling_64_16(f02[j2]<< 0)>>1); case 15: t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>3) | (word_cling_64_16(f01[j2]<< 4)>>2) | (word_cling_64_16(f02[j2]<< 4)>>1); case 14: t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>3) | (word_cling_64_16(f01[j2]<< 8)>>2) | (word_cling_64_16(f02[j2]<< 8)>>1); case 13: t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>3) | (word_cling_64_16(f01[j2]<<12)>>2) | (word_cling_64_16(f02[j2]<<12)>>1); case 12: t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>3) | (word_cling_64_16(f01[j2]<<16)>>2) | (word_cling_64_16(f02[j2]<<16)>>1); case 11: t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>3) | (word_cling_64_16(f01[j2]<<20)>>2) | (word_cling_64_16(f02[j2]<<20)>>1); case 10: t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>3) | (word_cling_64_16(f01[j2]<<24)>>2) | (word_cling_64_16(f02[j2]<<24)>>1); case 9: t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>3) | (word_cling_64_16(f01[j2]<<28)>>2) | (word_cling_64_16(f02[j2]<<28)>>1); case 8: t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>3) | (word_cling_64_16(f01[j2]<<32)>>2) | (word_cling_64_16(f02[j2]<<32)>>1); case 7: t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>3) | (word_cling_64_16(f01[j2]<<36)>>2) | (word_cling_64_16(f02[j2]<<36)>>1); case 6: t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>3) | (word_cling_64_16(f01[j2]<<40)>>2) | (word_cling_64_16(f02[j2]<<40)>>1); case 5: t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>3) | (word_cling_64_16(f01[j2]<<44)>>2) | (word_cling_64_16(f02[j2]<<44)>>1); case 4: t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>3) | (word_cling_64_16(f01[j2]<<48)>>2) | (word_cling_64_16(f02[j2]<<48)>>1); case 3: t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>3) | (word_cling_64_16(f01[j2]<<52)>>2) | (word_cling_64_16(f02[j2]<<52)>>1); case 2: t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>3) | (word_cling_64_16(f01[j2]<<56)>>2) | (word_cling_64_16(f02[j2]<<56)>>1); case 1: t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>3) | (word_cling_64_16(f01[j2]<<60)>>2) | (word_cling_64_16(f02[j2]<<60)>>1); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } } break; case 16: { for(rci_t i=0; inrows; i++) { const word *f00 = F->x[12]->rows[i]; const word *f01 = F->x[13]->rows[i]; const word *f02 = F->x[14]->rows[i]; const word *f03 = F->x[15]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+16 < T->x->width; j+=16, j2++) { t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>3) | (word_cling_64_16(f01[j2]<<60)>>2) | (word_cling_64_16(f02[j2]<<60)>>1) | (word_cling_64_16(f03[j2]<<60)>>0); t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>3) | (word_cling_64_16(f01[j2]<<56)>>2) | (word_cling_64_16(f02[j2]<<56)>>1) | (word_cling_64_16(f03[j2]<<56)>>0); t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>3) | (word_cling_64_16(f01[j2]<<52)>>2) | (word_cling_64_16(f02[j2]<<52)>>1) | (word_cling_64_16(f03[j2]<<52)>>0); t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>3) | (word_cling_64_16(f01[j2]<<48)>>2) | (word_cling_64_16(f02[j2]<<48)>>1) | (word_cling_64_16(f03[j2]<<48)>>0); t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>3) | (word_cling_64_16(f01[j2]<<44)>>2) | (word_cling_64_16(f02[j2]<<44)>>1) | (word_cling_64_16(f03[j2]<<44)>>0); t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>3) | (word_cling_64_16(f01[j2]<<40)>>2) | (word_cling_64_16(f02[j2]<<40)>>1) | (word_cling_64_16(f03[j2]<<40)>>0); t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>3) | (word_cling_64_16(f01[j2]<<36)>>2) | (word_cling_64_16(f02[j2]<<36)>>1) | (word_cling_64_16(f03[j2]<<36)>>0); t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>3) | (word_cling_64_16(f01[j2]<<32)>>2) | (word_cling_64_16(f02[j2]<<32)>>1) | (word_cling_64_16(f03[j2]<<32)>>0); t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>3) | (word_cling_64_16(f01[j2]<<28)>>2) | (word_cling_64_16(f02[j2]<<28)>>1) | (word_cling_64_16(f03[j2]<<28)>>0); t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>3) | (word_cling_64_16(f01[j2]<<24)>>2) | (word_cling_64_16(f02[j2]<<24)>>1) | (word_cling_64_16(f03[j2]<<24)>>0); t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>3) | (word_cling_64_16(f01[j2]<<20)>>2) | (word_cling_64_16(f02[j2]<<20)>>1) | (word_cling_64_16(f03[j2]<<20)>>0); t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>3) | (word_cling_64_16(f01[j2]<<16)>>2) | (word_cling_64_16(f02[j2]<<16)>>1) | (word_cling_64_16(f03[j2]<<16)>>0); t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>3) | (word_cling_64_16(f01[j2]<<12)>>2) | (word_cling_64_16(f02[j2]<<12)>>1) | (word_cling_64_16(f03[j2]<<12)>>0); t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>3) | (word_cling_64_16(f01[j2]<< 8)>>2) | (word_cling_64_16(f02[j2]<< 8)>>1) | (word_cling_64_16(f03[j2]<< 8)>>0); t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>3) | (word_cling_64_16(f01[j2]<< 4)>>2) | (word_cling_64_16(f02[j2]<< 4)>>1) | (word_cling_64_16(f03[j2]<< 4)>>0); t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>3) | (word_cling_64_16(f01[j2]<< 0)>>2) | (word_cling_64_16(f02[j2]<< 0)>>1) | (word_cling_64_16(f03[j2]<< 0)>>0); } register word tmp = t[T->x->width-1]; switch(T->x->width - j) { case 16: t[j+15] |= (word_cling_64_16(f00[j2]<< 0)>>3) | (word_cling_64_16(f01[j2]<< 0)>>2) | (word_cling_64_16(f02[j2]<< 0)>>1) | (word_cling_64_16(f03[j2]<< 0)>>0); case 15: t[j+14] |= (word_cling_64_16(f00[j2]<< 4)>>3) | (word_cling_64_16(f01[j2]<< 4)>>2) | (word_cling_64_16(f02[j2]<< 4)>>1) | (word_cling_64_16(f03[j2]<< 4)>>0); case 14: t[j+13] |= (word_cling_64_16(f00[j2]<< 8)>>3) | (word_cling_64_16(f01[j2]<< 8)>>2) | (word_cling_64_16(f02[j2]<< 8)>>1) | (word_cling_64_16(f03[j2]<< 8)>>0); case 13: t[j+12] |= (word_cling_64_16(f00[j2]<<12)>>3) | (word_cling_64_16(f01[j2]<<12)>>2) | (word_cling_64_16(f02[j2]<<12)>>1) | (word_cling_64_16(f03[j2]<<12)>>0); case 12: t[j+11] |= (word_cling_64_16(f00[j2]<<16)>>3) | (word_cling_64_16(f01[j2]<<16)>>2) | (word_cling_64_16(f02[j2]<<16)>>1) | (word_cling_64_16(f03[j2]<<16)>>0); case 11: t[j+10] |= (word_cling_64_16(f00[j2]<<20)>>3) | (word_cling_64_16(f01[j2]<<20)>>2) | (word_cling_64_16(f02[j2]<<20)>>1) | (word_cling_64_16(f03[j2]<<20)>>0); case 10: t[j+ 9] |= (word_cling_64_16(f00[j2]<<24)>>3) | (word_cling_64_16(f01[j2]<<24)>>2) | (word_cling_64_16(f02[j2]<<24)>>1) | (word_cling_64_16(f03[j2]<<24)>>0); case 9: t[j+ 8] |= (word_cling_64_16(f00[j2]<<28)>>3) | (word_cling_64_16(f01[j2]<<28)>>2) | (word_cling_64_16(f02[j2]<<28)>>1) | (word_cling_64_16(f03[j2]<<28)>>0); case 8: t[j+ 7] |= (word_cling_64_16(f00[j2]<<32)>>3) | (word_cling_64_16(f01[j2]<<32)>>2) | (word_cling_64_16(f02[j2]<<32)>>1) | (word_cling_64_16(f03[j2]<<32)>>0); case 7: t[j+ 6] |= (word_cling_64_16(f00[j2]<<36)>>3) | (word_cling_64_16(f01[j2]<<36)>>2) | (word_cling_64_16(f02[j2]<<36)>>1) | (word_cling_64_16(f03[j2]<<36)>>0); case 6: t[j+ 5] |= (word_cling_64_16(f00[j2]<<40)>>3) | (word_cling_64_16(f01[j2]<<40)>>2) | (word_cling_64_16(f02[j2]<<40)>>1) | (word_cling_64_16(f03[j2]<<40)>>0); case 5: t[j+ 4] |= (word_cling_64_16(f00[j2]<<44)>>3) | (word_cling_64_16(f01[j2]<<44)>>2) | (word_cling_64_16(f02[j2]<<44)>>1) | (word_cling_64_16(f03[j2]<<44)>>0); case 4: t[j+ 3] |= (word_cling_64_16(f00[j2]<<48)>>3) | (word_cling_64_16(f01[j2]<<48)>>2) | (word_cling_64_16(f02[j2]<<48)>>1) | (word_cling_64_16(f03[j2]<<48)>>0); case 3: t[j+ 2] |= (word_cling_64_16(f00[j2]<<52)>>3) | (word_cling_64_16(f01[j2]<<52)>>2) | (word_cling_64_16(f02[j2]<<52)>>1) | (word_cling_64_16(f03[j2]<<52)>>0); case 2: t[j+ 1] |= (word_cling_64_16(f00[j2]<<56)>>3) | (word_cling_64_16(f01[j2]<<56)>>2) | (word_cling_64_16(f02[j2]<<56)>>1) | (word_cling_64_16(f03[j2]<<56)>>0); case 1: t[j+ 0] |= (word_cling_64_16(f00[j2]<<60)>>3) | (word_cling_64_16(f01[j2]<<60)>>2) | (word_cling_64_16(f02[j2]<<60)>>1) | (word_cling_64_16(f03[j2]<<60)>>0); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } } break; } } } return T; } libm4rie-20130416/src/conversion_cling8.c000066400000000000000000000512531212302364300200510ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010-2013 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "conversion.h" static inline word word_cling_64_08(word a) { a = (a & xf0f0f0f0 & x__left08) | (a & xf0f0f0f0>> 4 & x__left08)>>28; a = (a & xcccccccc) | (a & xcccccccc>> 2)>>14; a = (a & xaaaaaaaa) | (a & xaaaaaaaa>> 1)>> 7; return a; } mzed_t *_mzed_cling8(mzed_t *T, const mzd_slice_t *F) { size_t j,j2 = 0; const word bitmask_end = __M4RI_LEFT_BITMASK((T->x->offset + T->x->ncols) % m4ri_radix); if (mzd_slice_is_zero(F)) return T; switch (F->finite_field->degree) { case 8: { for(rci_t i=0; inrows; i++) { const word *f0 = F->x[0]->rows[i]; const word *f1 = F->x[1]->rows[i]; const word *f2 = F->x[2]->rows[i]; const word *f3 = F->x[3]->rows[i]; const word *f4 = F->x[4]->rows[i]; const word *f5 = F->x[5]->rows[i]; const word *f6 = F->x[6]->rows[i]; const word *f7 = F->x[7]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+8 < T->x->width; j+=8, j2++) { t[j+0] = (word_cling_64_08(f0[j2]<<56)>>7) | (word_cling_64_08(f1[j2]<<56)>>6) | (word_cling_64_08(f2[j2]<<56)>>5) | (word_cling_64_08(f3[j2]<<56)>>4) \ | (word_cling_64_08(f4[j2]<<56)>>3) | (word_cling_64_08(f5[j2]<<56)>>2) | (word_cling_64_08(f6[j2]<<56)>>1) | (word_cling_64_08(f7[j2]<<56)>>0); t[j+1] = (word_cling_64_08(f0[j2]<<48)>>7) | (word_cling_64_08(f1[j2]<<48)>>6) | (word_cling_64_08(f2[j2]<<48)>>5) | (word_cling_64_08(f3[j2]<<48)>>4) \ | (word_cling_64_08(f4[j2]<<48)>>3) | (word_cling_64_08(f5[j2]<<48)>>2) | (word_cling_64_08(f6[j2]<<48)>>1) | (word_cling_64_08(f7[j2]<<48)>>0); t[j+2] = (word_cling_64_08(f0[j2]<<40)>>7) | (word_cling_64_08(f1[j2]<<40)>>6) | (word_cling_64_08(f2[j2]<<40)>>5) | (word_cling_64_08(f3[j2]<<40)>>4) \ | (word_cling_64_08(f4[j2]<<40)>>3) | (word_cling_64_08(f5[j2]<<40)>>2) | (word_cling_64_08(f6[j2]<<40)>>1) | (word_cling_64_08(f7[j2]<<40)>>0); t[j+3] = (word_cling_64_08(f0[j2]<<32)>>7) | (word_cling_64_08(f1[j2]<<32)>>6) | (word_cling_64_08(f2[j2]<<32)>>5) | (word_cling_64_08(f3[j2]<<32)>>4) \ | (word_cling_64_08(f4[j2]<<32)>>3) | (word_cling_64_08(f5[j2]<<32)>>2) | (word_cling_64_08(f6[j2]<<32)>>1) | (word_cling_64_08(f7[j2]<<32)>>0); t[j+4] = (word_cling_64_08(f0[j2]<<24)>>7) | (word_cling_64_08(f1[j2]<<24)>>6) | (word_cling_64_08(f2[j2]<<24)>>5) | (word_cling_64_08(f3[j2]<<24)>>4) \ | (word_cling_64_08(f4[j2]<<24)>>3) | (word_cling_64_08(f5[j2]<<24)>>2) | (word_cling_64_08(f6[j2]<<24)>>1) | (word_cling_64_08(f7[j2]<<24)>>0); t[j+5] = (word_cling_64_08(f0[j2]<<16)>>7) | (word_cling_64_08(f1[j2]<<16)>>6) | (word_cling_64_08(f2[j2]<<16)>>5) | (word_cling_64_08(f3[j2]<<16)>>4) \ | (word_cling_64_08(f4[j2]<<16)>>3) | (word_cling_64_08(f5[j2]<<16)>>2) | (word_cling_64_08(f6[j2]<<16)>>1) | (word_cling_64_08(f7[j2]<<16)>>0); t[j+6] = (word_cling_64_08(f0[j2]<< 8)>>7) | (word_cling_64_08(f1[j2]<< 8)>>6) | (word_cling_64_08(f2[j2]<< 8)>>5) | (word_cling_64_08(f3[j2]<< 8)>>4) \ | (word_cling_64_08(f4[j2]<< 8)>>3) | (word_cling_64_08(f5[j2]<< 8)>>2) | (word_cling_64_08(f6[j2]<< 8)>>1) | (word_cling_64_08(f7[j2]<< 8)>>0); t[j+7] = (word_cling_64_08(f0[j2]<< 0)>>7) | (word_cling_64_08(f1[j2]<< 0)>>6) | (word_cling_64_08(f2[j2]<< 0)>>5) | (word_cling_64_08(f3[j2]<< 0)>>4) \ | (word_cling_64_08(f4[j2]<< 0)>>3) | (word_cling_64_08(f5[j2]<< 0)>>2) | (word_cling_64_08(f6[j2]<< 0)>>1) | (word_cling_64_08(f7[j2]<< 0)>>0); } register word tmp = t[T->x->width-1]; switch(T->x->width - j) { case 8: t[j+7] = (word_cling_64_08(f0[j2]<< 0)>>7) | (word_cling_64_08(f1[j2]<< 0)>>6) | (word_cling_64_08(f2[j2]<< 0)>>5) | (word_cling_64_08(f3[j2]<< 0)>>4) \ | (word_cling_64_08(f4[j2]<< 0)>>3) | (word_cling_64_08(f5[j2]<< 0)>>2) | (word_cling_64_08(f6[j2]<< 0)>>1) | (word_cling_64_08(f7[j2]<< 0)>>0); case 7: t[j+6] = (word_cling_64_08(f0[j2]<< 8)>>7) | (word_cling_64_08(f1[j2]<< 8)>>6) | (word_cling_64_08(f2[j2]<< 8)>>5) | (word_cling_64_08(f3[j2]<< 8)>>4) \ | (word_cling_64_08(f4[j2]<< 8)>>3) | (word_cling_64_08(f5[j2]<< 8)>>2) | (word_cling_64_08(f6[j2]<< 8)>>1) | (word_cling_64_08(f7[j2]<< 8)>>0); case 6: t[j+5] = (word_cling_64_08(f0[j2]<<16)>>7) | (word_cling_64_08(f1[j2]<<16)>>6) | (word_cling_64_08(f2[j2]<<16)>>5) | (word_cling_64_08(f3[j2]<<16)>>4) \ | (word_cling_64_08(f4[j2]<<16)>>3) | (word_cling_64_08(f5[j2]<<16)>>2) | (word_cling_64_08(f6[j2]<<16)>>1) | (word_cling_64_08(f7[j2]<<16)>>0); case 5: t[j+4] = (word_cling_64_08(f0[j2]<<24)>>7) | (word_cling_64_08(f1[j2]<<24)>>6) | (word_cling_64_08(f2[j2]<<24)>>5) | (word_cling_64_08(f3[j2]<<24)>>4) \ | (word_cling_64_08(f4[j2]<<24)>>3) | (word_cling_64_08(f5[j2]<<24)>>2) | (word_cling_64_08(f6[j2]<<24)>>1) | (word_cling_64_08(f7[j2]<<24)>>0); case 4: t[j+3] = (word_cling_64_08(f0[j2]<<32)>>7) | (word_cling_64_08(f1[j2]<<32)>>6) | (word_cling_64_08(f2[j2]<<32)>>5) | (word_cling_64_08(f3[j2]<<32)>>4) \ | (word_cling_64_08(f4[j2]<<32)>>3) | (word_cling_64_08(f5[j2]<<32)>>2) | (word_cling_64_08(f6[j2]<<32)>>1) | (word_cling_64_08(f7[j2]<<32)>>0); case 3: t[j+2] = (word_cling_64_08(f0[j2]<<40)>>7) | (word_cling_64_08(f1[j2]<<40)>>6) | (word_cling_64_08(f2[j2]<<40)>>5) | (word_cling_64_08(f3[j2]<<40)>>4) \ | (word_cling_64_08(f4[j2]<<40)>>3) | (word_cling_64_08(f5[j2]<<40)>>2) | (word_cling_64_08(f6[j2]<<40)>>1) | (word_cling_64_08(f7[j2]<<40)>>0); case 2: t[j+1] = (word_cling_64_08(f0[j2]<<48)>>7) | (word_cling_64_08(f1[j2]<<48)>>6) | (word_cling_64_08(f2[j2]<<48)>>5) | (word_cling_64_08(f3[j2]<<48)>>4) \ | (word_cling_64_08(f4[j2]<<48)>>3) | (word_cling_64_08(f5[j2]<<48)>>2) | (word_cling_64_08(f6[j2]<<48)>>1) | (word_cling_64_08(f7[j2]<<48)>>0); case 1: t[j+0] = (word_cling_64_08(f0[j2]<<56)>>7) | (word_cling_64_08(f1[j2]<<56)>>6) | (word_cling_64_08(f2[j2]<<56)>>5) | (word_cling_64_08(f3[j2]<<56)>>4) \ | (word_cling_64_08(f4[j2]<<56)>>3) | (word_cling_64_08(f5[j2]<<56)>>2) | (word_cling_64_08(f6[j2]<<56)>>1) | (word_cling_64_08(f7[j2]<<56)>>0); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } // for loop } break; case 7: { for(rci_t i=0; inrows; i++) { const word *f0 = F->x[0]->rows[i]; const word *f1 = F->x[1]->rows[i]; const word *f2 = F->x[2]->rows[i]; const word *f3 = F->x[3]->rows[i]; const word *f4 = F->x[4]->rows[i]; const word *f5 = F->x[5]->rows[i]; const word *f6 = F->x[6]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+8 < T->x->width; j+=8, j2++) { t[j+0] = (word_cling_64_08(f0[j2]<<56)>>7) | (word_cling_64_08(f1[j2]<<56)>>6) | (word_cling_64_08(f2[j2]<<56)>>5) | (word_cling_64_08(f3[j2]<<56)>>4) \ | (word_cling_64_08(f4[j2]<<56)>>3) | (word_cling_64_08(f5[j2]<<56)>>2) | (word_cling_64_08(f6[j2]<<56)>>1); t[j+1] = (word_cling_64_08(f0[j2]<<48)>>7) | (word_cling_64_08(f1[j2]<<48)>>6) | (word_cling_64_08(f2[j2]<<48)>>5) | (word_cling_64_08(f3[j2]<<48)>>4) \ | (word_cling_64_08(f4[j2]<<48)>>3) | (word_cling_64_08(f5[j2]<<48)>>2) | (word_cling_64_08(f6[j2]<<48)>>1); t[j+2] = (word_cling_64_08(f0[j2]<<40)>>7) | (word_cling_64_08(f1[j2]<<40)>>6) | (word_cling_64_08(f2[j2]<<40)>>5) | (word_cling_64_08(f3[j2]<<40)>>4) \ | (word_cling_64_08(f4[j2]<<40)>>3) | (word_cling_64_08(f5[j2]<<40)>>2) | (word_cling_64_08(f6[j2]<<40)>>1); t[j+3] = (word_cling_64_08(f0[j2]<<32)>>7) | (word_cling_64_08(f1[j2]<<32)>>6) | (word_cling_64_08(f2[j2]<<32)>>5) | (word_cling_64_08(f3[j2]<<32)>>4) \ | (word_cling_64_08(f4[j2]<<32)>>3) | (word_cling_64_08(f5[j2]<<32)>>2) | (word_cling_64_08(f6[j2]<<32)>>1); t[j+4] = (word_cling_64_08(f0[j2]<<24)>>7) | (word_cling_64_08(f1[j2]<<24)>>6) | (word_cling_64_08(f2[j2]<<24)>>5) | (word_cling_64_08(f3[j2]<<24)>>4) \ | (word_cling_64_08(f4[j2]<<24)>>3) | (word_cling_64_08(f5[j2]<<24)>>2) | (word_cling_64_08(f6[j2]<<24)>>1); t[j+5] = (word_cling_64_08(f0[j2]<<16)>>7) | (word_cling_64_08(f1[j2]<<16)>>6) | (word_cling_64_08(f2[j2]<<16)>>5) | (word_cling_64_08(f3[j2]<<16)>>4) \ | (word_cling_64_08(f4[j2]<<16)>>3) | (word_cling_64_08(f5[j2]<<16)>>2) | (word_cling_64_08(f6[j2]<<16)>>1); t[j+6] = (word_cling_64_08(f0[j2]<< 8)>>7) | (word_cling_64_08(f1[j2]<< 8)>>6) | (word_cling_64_08(f2[j2]<< 8)>>5) | (word_cling_64_08(f3[j2]<< 8)>>4) \ | (word_cling_64_08(f4[j2]<< 8)>>3) | (word_cling_64_08(f5[j2]<< 8)>>2) | (word_cling_64_08(f6[j2]<< 8)>>1); t[j+7] = (word_cling_64_08(f0[j2]<< 0)>>7) | (word_cling_64_08(f1[j2]<< 0)>>6) | (word_cling_64_08(f2[j2]<< 0)>>5) | (word_cling_64_08(f3[j2]<< 0)>>4) \ | (word_cling_64_08(f4[j2]<< 0)>>3) | (word_cling_64_08(f5[j2]<< 0)>>2) | (word_cling_64_08(f6[j2]<< 0)>>1); } register word tmp= t[T->x->width-1]; switch(T->x->width - j) { case 8: t[j+7] = (word_cling_64_08(f0[j2]<< 0)>>7) | (word_cling_64_08(f1[j2]<< 0)>>6) | (word_cling_64_08(f2[j2]<< 0)>>5) | (word_cling_64_08(f3[j2]<< 0)>>4) \ | (word_cling_64_08(f4[j2]<< 0)>>3) | (word_cling_64_08(f5[j2]<< 0)>>2) | (word_cling_64_08(f6[j2]<< 0)>>1); case 7: t[j+6] = (word_cling_64_08(f0[j2]<< 8)>>7) | (word_cling_64_08(f1[j2]<< 8)>>6) | (word_cling_64_08(f2[j2]<< 8)>>5) | (word_cling_64_08(f3[j2]<< 8)>>4) \ | (word_cling_64_08(f4[j2]<< 8)>>3) | (word_cling_64_08(f5[j2]<< 8)>>2) | (word_cling_64_08(f6[j2]<< 8)>>1); case 6: t[j+5] = (word_cling_64_08(f0[j2]<<16)>>7) | (word_cling_64_08(f1[j2]<<16)>>6) | (word_cling_64_08(f2[j2]<<16)>>5) | (word_cling_64_08(f3[j2]<<16)>>4) \ | (word_cling_64_08(f4[j2]<<16)>>3) | (word_cling_64_08(f5[j2]<<16)>>2) | (word_cling_64_08(f6[j2]<<16)>>1); case 5: t[j+4] = (word_cling_64_08(f0[j2]<<24)>>7) | (word_cling_64_08(f1[j2]<<24)>>6) | (word_cling_64_08(f2[j2]<<24)>>5) | (word_cling_64_08(f3[j2]<<24)>>4) \ | (word_cling_64_08(f4[j2]<<24)>>3) | (word_cling_64_08(f5[j2]<<24)>>2) | (word_cling_64_08(f6[j2]<<24)>>1); case 4: t[j+3] = (word_cling_64_08(f0[j2]<<32)>>7) | (word_cling_64_08(f1[j2]<<32)>>6) | (word_cling_64_08(f2[j2]<<32)>>5) | (word_cling_64_08(f3[j2]<<32)>>4) \ | (word_cling_64_08(f4[j2]<<32)>>3) | (word_cling_64_08(f5[j2]<<32)>>2) | (word_cling_64_08(f6[j2]<<32)>>1); case 3: t[j+2] = (word_cling_64_08(f0[j2]<<40)>>7) | (word_cling_64_08(f1[j2]<<40)>>6) | (word_cling_64_08(f2[j2]<<40)>>5) | (word_cling_64_08(f3[j2]<<40)>>4) \ | (word_cling_64_08(f4[j2]<<40)>>3) | (word_cling_64_08(f5[j2]<<40)>>2) | (word_cling_64_08(f6[j2]<<40)>>1); case 2: t[j+1] = (word_cling_64_08(f0[j2]<<48)>>7) | (word_cling_64_08(f1[j2]<<48)>>6) | (word_cling_64_08(f2[j2]<<48)>>5) | (word_cling_64_08(f3[j2]<<48)>>4) \ | (word_cling_64_08(f4[j2]<<48)>>3) | (word_cling_64_08(f5[j2]<<48)>>2) | (word_cling_64_08(f6[j2]<<48)>>1); case 1: t[j+0] = (word_cling_64_08(f0[j2]<<56)>>7) | (word_cling_64_08(f1[j2]<<56)>>6) | (word_cling_64_08(f2[j2]<<56)>>5) | (word_cling_64_08(f3[j2]<<56)>>4) \ | (word_cling_64_08(f4[j2]<<56)>>3) | (word_cling_64_08(f5[j2]<<56)>>2) | (word_cling_64_08(f6[j2]<<56)>>1); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } } break; case 6: { for(rci_t i=0; inrows; i++) { const word *f0 = F->x[0]->rows[i]; const word *f1 = F->x[1]->rows[i]; const word *f2 = F->x[2]->rows[i]; const word *f3 = F->x[3]->rows[i]; const word *f4 = F->x[4]->rows[i]; const word *f5 = F->x[5]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+8 < T->x->width; j+=8, j2++) { t[j+0] = (word_cling_64_08(f0[j2]<<56)>>7) | (word_cling_64_08(f1[j2]<<56)>>6) | (word_cling_64_08(f2[j2]<<56)>>5) | (word_cling_64_08(f3[j2]<<56)>>4) \ | (word_cling_64_08(f4[j2]<<56)>>3) | (word_cling_64_08(f5[j2]<<56)>>2); t[j+1] = (word_cling_64_08(f0[j2]<<48)>>7) | (word_cling_64_08(f1[j2]<<48)>>6) | (word_cling_64_08(f2[j2]<<48)>>5) | (word_cling_64_08(f3[j2]<<48)>>4) \ | (word_cling_64_08(f4[j2]<<48)>>3) | (word_cling_64_08(f5[j2]<<48)>>2); t[j+2] = (word_cling_64_08(f0[j2]<<40)>>7) | (word_cling_64_08(f1[j2]<<40)>>6) | (word_cling_64_08(f2[j2]<<40)>>5) | (word_cling_64_08(f3[j2]<<40)>>4) \ | (word_cling_64_08(f4[j2]<<40)>>3) | (word_cling_64_08(f5[j2]<<40)>>2); t[j+3] = (word_cling_64_08(f0[j2]<<32)>>7) | (word_cling_64_08(f1[j2]<<32)>>6) | (word_cling_64_08(f2[j2]<<32)>>5) | (word_cling_64_08(f3[j2]<<32)>>4) \ | (word_cling_64_08(f4[j2]<<32)>>3) | (word_cling_64_08(f5[j2]<<32)>>2); t[j+4] = (word_cling_64_08(f0[j2]<<24)>>7) | (word_cling_64_08(f1[j2]<<24)>>6) | (word_cling_64_08(f2[j2]<<24)>>5) | (word_cling_64_08(f3[j2]<<24)>>4) \ | (word_cling_64_08(f4[j2]<<24)>>3) | (word_cling_64_08(f5[j2]<<24)>>2); t[j+5] = (word_cling_64_08(f0[j2]<<16)>>7) | (word_cling_64_08(f1[j2]<<16)>>6) | (word_cling_64_08(f2[j2]<<16)>>5) | (word_cling_64_08(f3[j2]<<16)>>4) \ | (word_cling_64_08(f4[j2]<<16)>>3) | (word_cling_64_08(f5[j2]<<16)>>2); t[j+6] = (word_cling_64_08(f0[j2]<< 8)>>7) | (word_cling_64_08(f1[j2]<< 8)>>6) | (word_cling_64_08(f2[j2]<< 8)>>5) | (word_cling_64_08(f3[j2]<< 8)>>4) \ | (word_cling_64_08(f4[j2]<< 8)>>3) | (word_cling_64_08(f5[j2]<< 8)>>2); t[j+7] = (word_cling_64_08(f0[j2]<< 0)>>7) | (word_cling_64_08(f1[j2]<< 0)>>6) | (word_cling_64_08(f2[j2]<< 0)>>5) | (word_cling_64_08(f3[j2]<< 0)>>4) \ | (word_cling_64_08(f4[j2]<< 0)>>3) | (word_cling_64_08(f5[j2]<< 0)>>2); } register word tmp = t[T->x->width-1]; switch(T->x->width - j) { case 8: t[j+7] = (word_cling_64_08(f0[j2]<< 0)>>7) | (word_cling_64_08(f1[j2]<< 0)>>6) | (word_cling_64_08(f2[j2]<< 0)>>5) | (word_cling_64_08(f3[j2]<< 0)>>4) \ | (word_cling_64_08(f4[j2]<< 0)>>3) | (word_cling_64_08(f5[j2]<< 0)>>2); case 7: t[j+6] = (word_cling_64_08(f0[j2]<< 8)>>7) | (word_cling_64_08(f1[j2]<< 8)>>6) | (word_cling_64_08(f2[j2]<< 8)>>5) | (word_cling_64_08(f3[j2]<< 8)>>4) \ | (word_cling_64_08(f4[j2]<< 8)>>3) | (word_cling_64_08(f5[j2]<< 8)>>2); case 6: t[j+5] = (word_cling_64_08(f0[j2]<<16)>>7) | (word_cling_64_08(f1[j2]<<16)>>6) | (word_cling_64_08(f2[j2]<<16)>>5) | (word_cling_64_08(f3[j2]<<16)>>4) \ | (word_cling_64_08(f4[j2]<<16)>>3) | (word_cling_64_08(f5[j2]<<16)>>2); case 5: t[j+4] = (word_cling_64_08(f0[j2]<<24)>>7) | (word_cling_64_08(f1[j2]<<24)>>6) | (word_cling_64_08(f2[j2]<<24)>>5) | (word_cling_64_08(f3[j2]<<24)>>4) \ | (word_cling_64_08(f4[j2]<<24)>>3) | (word_cling_64_08(f5[j2]<<24)>>2); case 4: t[j+3] = (word_cling_64_08(f0[j2]<<32)>>7) | (word_cling_64_08(f1[j2]<<32)>>6) | (word_cling_64_08(f2[j2]<<32)>>5) | (word_cling_64_08(f3[j2]<<32)>>4) \ | (word_cling_64_08(f4[j2]<<32)>>3) | (word_cling_64_08(f5[j2]<<32)>>2); case 3: t[j+2] = (word_cling_64_08(f0[j2]<<40)>>7) | (word_cling_64_08(f1[j2]<<40)>>6) | (word_cling_64_08(f2[j2]<<40)>>5) | (word_cling_64_08(f3[j2]<<40)>>4) \ | (word_cling_64_08(f4[j2]<<40)>>3) | (word_cling_64_08(f5[j2]<<40)>>2); case 2: t[j+1] = (word_cling_64_08(f0[j2]<<48)>>7) | (word_cling_64_08(f1[j2]<<48)>>6) | (word_cling_64_08(f2[j2]<<48)>>5) | (word_cling_64_08(f3[j2]<<48)>>4) \ | (word_cling_64_08(f4[j2]<<48)>>3) | (word_cling_64_08(f5[j2]<<48)>>2); case 1: t[j+0] = (word_cling_64_08(f0[j2]<<56)>>7) | (word_cling_64_08(f1[j2]<<56)>>6) | (word_cling_64_08(f2[j2]<<56)>>5) | (word_cling_64_08(f3[j2]<<56)>>4) \ | (word_cling_64_08(f4[j2]<<56)>>3) | (word_cling_64_08(f5[j2]<<56)>>2); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } } break; case 5: { for(rci_t i=0; inrows; i++) { const word *f0 = F->x[0]->rows[i]; const word *f1 = F->x[1]->rows[i]; const word *f2 = F->x[2]->rows[i]; const word *f3 = F->x[3]->rows[i]; const word *f4 = F->x[4]->rows[i]; word *t = T->x->rows[i]; for(j=0, j2=0; j+8 < T->x->width; j+=8, j2++) { t[j+0] = (word_cling_64_08(f0[j2]<<56)>>7) | (word_cling_64_08(f1[j2]<<56)>>6) | (word_cling_64_08(f2[j2]<<56)>>5) | (word_cling_64_08(f3[j2]<<56)>>4) | (word_cling_64_08(f4[j2]<<56)>>3); t[j+1] = (word_cling_64_08(f0[j2]<<48)>>7) | (word_cling_64_08(f1[j2]<<48)>>6) | (word_cling_64_08(f2[j2]<<48)>>5) | (word_cling_64_08(f3[j2]<<48)>>4) | (word_cling_64_08(f4[j2]<<48)>>3); t[j+2] = (word_cling_64_08(f0[j2]<<40)>>7) | (word_cling_64_08(f1[j2]<<40)>>6) | (word_cling_64_08(f2[j2]<<40)>>5) | (word_cling_64_08(f3[j2]<<40)>>4) | (word_cling_64_08(f4[j2]<<40)>>3); t[j+3] = (word_cling_64_08(f0[j2]<<32)>>7) | (word_cling_64_08(f1[j2]<<32)>>6) | (word_cling_64_08(f2[j2]<<32)>>5) | (word_cling_64_08(f3[j2]<<32)>>4) | (word_cling_64_08(f4[j2]<<32)>>3); t[j+4] = (word_cling_64_08(f0[j2]<<24)>>7) | (word_cling_64_08(f1[j2]<<24)>>6) | (word_cling_64_08(f2[j2]<<24)>>5) | (word_cling_64_08(f3[j2]<<24)>>4) | (word_cling_64_08(f4[j2]<<24)>>3); t[j+5] = (word_cling_64_08(f0[j2]<<16)>>7) | (word_cling_64_08(f1[j2]<<16)>>6) | (word_cling_64_08(f2[j2]<<16)>>5) | (word_cling_64_08(f3[j2]<<16)>>4) | (word_cling_64_08(f4[j2]<<16)>>3); t[j+6] = (word_cling_64_08(f0[j2]<< 8)>>7) | (word_cling_64_08(f1[j2]<< 8)>>6) | (word_cling_64_08(f2[j2]<< 8)>>5) | (word_cling_64_08(f3[j2]<< 8)>>4) | (word_cling_64_08(f4[j2]<< 8)>>3); t[j+7] = (word_cling_64_08(f0[j2]<< 0)>>7) | (word_cling_64_08(f1[j2]<< 0)>>6) | (word_cling_64_08(f2[j2]<< 0)>>5) | (word_cling_64_08(f3[j2]<< 0)>>4) | (word_cling_64_08(f4[j2]<< 0)>>3); } register word tmp = t[T->x->width - 1]; switch(T->x->width - j) { case 8: t[j+7] = (word_cling_64_08(f0[j2]<< 0)>>7) | (word_cling_64_08(f1[j2]<< 0)>>6) | (word_cling_64_08(f2[j2]<< 0)>>5) | (word_cling_64_08(f3[j2]<< 0)>>4) | (word_cling_64_08(f4[j2]<< 0)>>3); case 7: t[j+6] = (word_cling_64_08(f0[j2]<< 8)>>7) | (word_cling_64_08(f1[j2]<< 8)>>6) | (word_cling_64_08(f2[j2]<< 8)>>5) | (word_cling_64_08(f3[j2]<< 8)>>4) | (word_cling_64_08(f4[j2]<< 8)>>3); case 6: t[j+5] = (word_cling_64_08(f0[j2]<<16)>>7) | (word_cling_64_08(f1[j2]<<16)>>6) | (word_cling_64_08(f2[j2]<<16)>>5) | (word_cling_64_08(f3[j2]<<16)>>4) | (word_cling_64_08(f4[j2]<<16)>>3); case 5: t[j+4] = (word_cling_64_08(f0[j2]<<24)>>7) | (word_cling_64_08(f1[j2]<<24)>>6) | (word_cling_64_08(f2[j2]<<24)>>5) | (word_cling_64_08(f3[j2]<<24)>>4) | (word_cling_64_08(f4[j2]<<24)>>3); case 4: t[j+3] = (word_cling_64_08(f0[j2]<<32)>>7) | (word_cling_64_08(f1[j2]<<32)>>6) | (word_cling_64_08(f2[j2]<<32)>>5) | (word_cling_64_08(f3[j2]<<32)>>4) | (word_cling_64_08(f4[j2]<<32)>>3); case 3: t[j+2] = (word_cling_64_08(f0[j2]<<40)>>7) | (word_cling_64_08(f1[j2]<<40)>>6) | (word_cling_64_08(f2[j2]<<40)>>5) | (word_cling_64_08(f3[j2]<<40)>>4) | (word_cling_64_08(f4[j2]<<40)>>3); case 2: t[j+1] = (word_cling_64_08(f0[j2]<<48)>>7) | (word_cling_64_08(f1[j2]<<48)>>6) | (word_cling_64_08(f2[j2]<<48)>>5) | (word_cling_64_08(f3[j2]<<48)>>4) | (word_cling_64_08(f4[j2]<<48)>>3); case 1: t[j+0] = (word_cling_64_08(f0[j2]<<56)>>7) | (word_cling_64_08(f1[j2]<<56)>>6) | (word_cling_64_08(f2[j2]<<56)>>5) | (word_cling_64_08(f3[j2]<<56)>>4) | (word_cling_64_08(f4[j2]<<56)>>3); break; default: m4ri_die("impossible"); } t[T->x->width-1] = (t[T->x->width-1] & bitmask_end) | (tmp & ~bitmask_end); } } break; default: m4ri_die("impossible"); } return T; } libm4rie-20130416/src/conversion_slice16.c000066400000000000000000000726561212302364300201450ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010-2013 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "conversion.h" static inline word word_slice_64_16(word a) { a = (a & xffff0000) | (a & xffff0000>>16)<<15; a = (a & xffffffff) | (a & xffffffff>>32)<<30; return a; } /* we define these things to keep code compact below. */ #define word_slice_64_16_combine_bulk(T, Ti, F, Fi, shift) \ T[Ti] |= word_slice_64_16(F[Fi+ 0]<>60 | word_slice_64_16(F[Fi+ 1]<>56 \ | word_slice_64_16(F[Fi+ 2]<>52 | word_slice_64_16(F[Fi+ 3]<>48 \ | word_slice_64_16(F[Fi+ 4]<>44 | word_slice_64_16(F[Fi+ 5]<>40 \ | word_slice_64_16(F[Fi+ 6]<>36 | word_slice_64_16(F[Fi+ 7]<>32 \ | word_slice_64_16(F[Fi+ 8]<>28 | word_slice_64_16(F[Fi+ 9]<>24 \ | word_slice_64_16(F[Fi+10]<>20 | word_slice_64_16(F[Fi+11]<>16 \ | word_slice_64_16(F[Fi+12]<>12 | word_slice_64_16(F[Fi+13]<> 8 \ | word_slice_64_16(F[Fi+14]<> 4 | word_slice_64_16(F[Fi+15]<> 0; #define word_slice_64_16_slice_rest(F, Fi, shift) \ r0 |= word_slice_64_16(F[Fi]<<15 & x80008000)>> shift; \ r1 |= word_slice_64_16(F[Fi]<<14 & x80008000)>> shift; \ r2 |= word_slice_64_16(F[Fi]<<13 & x80008000)>> shift; \ r3 |= word_slice_64_16(F[Fi]<<12 & x80008000)>> shift; \ r4 |= word_slice_64_16(F[Fi]<<11 & x80008000)>> shift; \ r5 |= word_slice_64_16(F[Fi]<<10 & x80008000)>> shift; \ r6 |= word_slice_64_16(F[Fi]<< 9 & x80008000)>> shift; \ r7 |= word_slice_64_16(F[Fi]<< 8 & x80008000)>> shift; mzd_slice_t *_mzed_slice16(mzd_slice_t *T, const mzed_t *F) { assert(T && (8 < T->depth && T->depth <= 16) && T->x[0]->offset == 0); size_t j, j2 = 0; register word r0,r1,r2,r3,r4,r5,r6,r7 = 0; const word bitmask_end = __M4RI_LEFT_BITMASK((T->x[0]->offset + T->ncols) % m4ri_radix); if (mzed_is_zero(F)) return T; /* we do multiple runs over T to make the code more compact, we start by doing the first eight bits */ for(size_t i=0; inrows; i++) { word *t0 = T->x[0]->rows[i]; word *t1 = T->x[1]->rows[i]; word *t2 = T->x[2]->rows[i]; word *t3 = T->x[3]->rows[i]; word *t4 = T->x[4]->rows[i]; word *t5 = T->x[5]->rows[i]; word *t6 = T->x[6]->rows[i]; word *t7 = T->x[7]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+16 < F->x->width; j+=16,j2++) { word_slice_64_16_combine_bulk(t0, j2, f, j, 15); word_slice_64_16_combine_bulk(t1, j2, f, j, 14); word_slice_64_16_combine_bulk(t2, j2, f, j, 13); word_slice_64_16_combine_bulk(t3, j2, f, j, 12); word_slice_64_16_combine_bulk(t4, j2, f, j, 11); word_slice_64_16_combine_bulk(t5, j2, f, j, 10); word_slice_64_16_combine_bulk(t6, j2, f, j, 9); word_slice_64_16_combine_bulk(t7, j2, f, j, 8); } r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = 0; switch(F->x->width - j) { case 16: word_slice_64_16_slice_rest(f, j+15, 0); case 15: word_slice_64_16_slice_rest(f, j+14, 4); case 14: word_slice_64_16_slice_rest(f, j+13, 8); case 13: word_slice_64_16_slice_rest(f, j+12, 12); case 12: word_slice_64_16_slice_rest(f, j+11, 16); case 11: word_slice_64_16_slice_rest(f, j+10, 20); case 10: word_slice_64_16_slice_rest(f, j+ 9, 24); case 9: word_slice_64_16_slice_rest(f, j+ 8, 28); case 8: word_slice_64_16_slice_rest(f, j+ 7, 32); case 7: word_slice_64_16_slice_rest(f, j+ 6, 36); case 6: word_slice_64_16_slice_rest(f, j+ 5, 40); case 5: word_slice_64_16_slice_rest(f, j+ 4, 44); case 4: word_slice_64_16_slice_rest(f, j+ 3, 48); case 3: word_slice_64_16_slice_rest(f, j+ 2, 52); case 2: word_slice_64_16_slice_rest(f, j+ 1, 56); case 1: word_slice_64_16_slice_rest(f, j+ 0, 60); break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; t2[j2] |= r2 & bitmask_end; t3[j2] |= r3 & bitmask_end; t4[j2] |= r4 & bitmask_end; t5[j2] |= r5 & bitmask_end; t6[j2] |= r6 & bitmask_end; t7[j2] |= r7 & bitmask_end; } if(T->depth >= 12) { for(size_t i=0; inrows; i++) { word *t0 = T->x[ 8]->rows[i]; word *t1 = T->x[ 9]->rows[i]; word *t2 = T->x[10]->rows[i]; word *t3 = T->x[11]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+16 < F->x->width; j+=16,j2++) { word_slice_64_16_combine_bulk(t0, j2, f, j, 7); word_slice_64_16_combine_bulk(t1, j2, f, j, 6); word_slice_64_16_combine_bulk(t2, j2, f, j, 5); word_slice_64_16_combine_bulk(t3, j2, f, j, 4); } r0 = r1 = r2 = r3 = 0; switch(F->x->width - j) { case 16: r0 |= word_slice_64_16(f[j+15]<< 7 & x80008000)>> 0; r1 |= word_slice_64_16(f[j+15]<< 6 & x80008000)>> 0; r2 |= word_slice_64_16(f[j+15]<< 5 & x80008000)>> 0; r3 |= word_slice_64_16(f[j+15]<< 4 & x80008000)>> 0; case 15: r0 |= word_slice_64_16(f[j+14]<< 7 & x80008000)>> 4; r1 |= word_slice_64_16(f[j+14]<< 6 & x80008000)>> 4; r2 |= word_slice_64_16(f[j+14]<< 5 & x80008000)>> 4; r3 |= word_slice_64_16(f[j+14]<< 4 & x80008000)>> 4; case 14: r0 |= word_slice_64_16(f[j+13]<< 7 & x80008000)>> 8; r1 |= word_slice_64_16(f[j+13]<< 6 & x80008000)>> 8; r2 |= word_slice_64_16(f[j+13]<< 5 & x80008000)>> 8; r3 |= word_slice_64_16(f[j+13]<< 4 & x80008000)>> 8; case 13: r0 |= word_slice_64_16(f[j+12]<< 7 & x80008000)>> 12; r1 |= word_slice_64_16(f[j+12]<< 6 & x80008000)>> 12; r2 |= word_slice_64_16(f[j+12]<< 5 & x80008000)>> 12; r3 |= word_slice_64_16(f[j+12]<< 4 & x80008000)>> 12; case 12: r0 |= word_slice_64_16(f[j+11]<< 7 & x80008000)>> 16; r1 |= word_slice_64_16(f[j+11]<< 6 & x80008000)>> 16; r2 |= word_slice_64_16(f[j+11]<< 5 & x80008000)>> 16; r3 |= word_slice_64_16(f[j+11]<< 4 & x80008000)>> 16; case 11: r0 |= word_slice_64_16(f[j+10]<< 7 & x80008000)>> 20; r1 |= word_slice_64_16(f[j+10]<< 6 & x80008000)>> 20; r2 |= word_slice_64_16(f[j+10]<< 5 & x80008000)>> 20; r3 |= word_slice_64_16(f[j+10]<< 4 & x80008000)>> 20; case 10: r0 |= word_slice_64_16(f[j+ 9]<< 7 & x80008000)>> 24; r1 |= word_slice_64_16(f[j+ 9]<< 6 & x80008000)>> 24; r2 |= word_slice_64_16(f[j+ 9]<< 5 & x80008000)>> 24; r3 |= word_slice_64_16(f[j+ 9]<< 4 & x80008000)>> 24; case 9: r0 |= word_slice_64_16(f[j+ 8]<< 7 & x80008000)>> 28; r1 |= word_slice_64_16(f[j+ 8]<< 6 & x80008000)>> 28; r2 |= word_slice_64_16(f[j+ 8]<< 5 & x80008000)>> 28; r3 |= word_slice_64_16(f[j+ 8]<< 4 & x80008000)>> 28; case 8: r0 |= word_slice_64_16(f[j+ 7]<< 7 & x80008000)>> 32; r1 |= word_slice_64_16(f[j+ 7]<< 6 & x80008000)>> 32; r2 |= word_slice_64_16(f[j+ 7]<< 5 & x80008000)>> 32; r3 |= word_slice_64_16(f[j+ 7]<< 4 & x80008000)>> 32; case 7: r0 |= word_slice_64_16(f[j+ 6]<< 7 & x80008000)>> 36; r1 |= word_slice_64_16(f[j+ 6]<< 6 & x80008000)>> 36; r2 |= word_slice_64_16(f[j+ 6]<< 5 & x80008000)>> 36; r3 |= word_slice_64_16(f[j+ 6]<< 4 & x80008000)>> 36; case 6: r0 |= word_slice_64_16(f[j+ 5]<< 7 & x80008000)>> 40; r1 |= word_slice_64_16(f[j+ 5]<< 6 & x80008000)>> 40; r2 |= word_slice_64_16(f[j+ 5]<< 5 & x80008000)>> 40; r3 |= word_slice_64_16(f[j+ 5]<< 4 & x80008000)>> 40; case 5: r0 |= word_slice_64_16(f[j+ 4]<< 7 & x80008000)>> 44; r1 |= word_slice_64_16(f[j+ 4]<< 6 & x80008000)>> 44; r2 |= word_slice_64_16(f[j+ 4]<< 5 & x80008000)>> 44; r3 |= word_slice_64_16(f[j+ 4]<< 4 & x80008000)>> 44; case 4: r0 |= word_slice_64_16(f[j+ 3]<< 7 & x80008000)>> 48; r1 |= word_slice_64_16(f[j+ 3]<< 6 & x80008000)>> 48; r2 |= word_slice_64_16(f[j+ 3]<< 5 & x80008000)>> 48; r3 |= word_slice_64_16(f[j+ 3]<< 4 & x80008000)>> 48; case 3: r0 |= word_slice_64_16(f[j+ 2]<< 7 & x80008000)>> 52; r1 |= word_slice_64_16(f[j+ 2]<< 6 & x80008000)>> 52; r2 |= word_slice_64_16(f[j+ 2]<< 5 & x80008000)>> 52; r3 |= word_slice_64_16(f[j+ 2]<< 4 & x80008000)>> 52; case 2: r0 |= word_slice_64_16(f[j+ 1]<< 7 & x80008000)>> 56; r1 |= word_slice_64_16(f[j+ 1]<< 6 & x80008000)>> 56; r2 |= word_slice_64_16(f[j+ 1]<< 5 & x80008000)>> 56; r3 |= word_slice_64_16(f[j+ 1]<< 4 & x80008000)>> 56; case 1: r0 |= word_slice_64_16(f[j+ 0]<< 7 & x80008000)>> 60; r1 |= word_slice_64_16(f[j+ 0]<< 6 & x80008000)>> 60; r2 |= word_slice_64_16(f[j+ 0]<< 5 & x80008000)>> 60; r3 |= word_slice_64_16(f[j+ 0]<< 4 & x80008000)>> 60; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; t2[j2] |= r2 & bitmask_end; t3[j2] |= r3 & bitmask_end; } switch(T->depth) { case 16: { for(size_t i=0; inrows; i++) { word *t0 = T->x[12]->rows[i]; word *t1 = T->x[13]->rows[i]; word *t2 = T->x[14]->rows[i]; word *t3 = T->x[15]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+16 < F->x->width; j+=16,j2++) { word_slice_64_16_combine_bulk(t0, j2, f, j, 3); word_slice_64_16_combine_bulk(t1, j2, f, j, 2); word_slice_64_16_combine_bulk(t2, j2, f, j, 1); word_slice_64_16_combine_bulk(t3, j2, f, j, 0); } r0 = r1 = r2 = r3 = 0; switch(F->x->width - j) { case 16: r0 |= word_slice_64_16(f[j+15]<< 3 & x80008000)>> 0; r1 |= word_slice_64_16(f[j+15]<< 2 & x80008000)>> 0; r2 |= word_slice_64_16(f[j+15]<< 1 & x80008000)>> 0; r3 |= word_slice_64_16(f[j+15]<< 0 & x80008000)>> 0; case 15: r0 |= word_slice_64_16(f[j+14]<< 3 & x80008000)>> 4; r1 |= word_slice_64_16(f[j+14]<< 2 & x80008000)>> 4; r2 |= word_slice_64_16(f[j+14]<< 1 & x80008000)>> 4; r3 |= word_slice_64_16(f[j+14]<< 0 & x80008000)>> 4; case 14: r0 |= word_slice_64_16(f[j+13]<< 3 & x80008000)>> 8; r1 |= word_slice_64_16(f[j+13]<< 2 & x80008000)>> 8; r2 |= word_slice_64_16(f[j+13]<< 1 & x80008000)>> 8; r3 |= word_slice_64_16(f[j+13]<< 0 & x80008000)>> 8; case 13: r0 |= word_slice_64_16(f[j+12]<< 3 & x80008000)>> 12; r1 |= word_slice_64_16(f[j+12]<< 2 & x80008000)>> 12; r2 |= word_slice_64_16(f[j+12]<< 1 & x80008000)>> 12; r3 |= word_slice_64_16(f[j+12]<< 0 & x80008000)>> 12; case 12: r0 |= word_slice_64_16(f[j+11]<< 3 & x80008000)>> 16; r1 |= word_slice_64_16(f[j+11]<< 2 & x80008000)>> 16; r2 |= word_slice_64_16(f[j+11]<< 1 & x80008000)>> 16; r3 |= word_slice_64_16(f[j+11]<< 0 & x80008000)>> 16; case 11: r0 |= word_slice_64_16(f[j+10]<< 3 & x80008000)>> 20; r1 |= word_slice_64_16(f[j+10]<< 2 & x80008000)>> 20; r2 |= word_slice_64_16(f[j+10]<< 1 & x80008000)>> 20; r3 |= word_slice_64_16(f[j+10]<< 0 & x80008000)>> 20; case 10: r0 |= word_slice_64_16(f[j+ 9]<< 3 & x80008000)>> 24; r1 |= word_slice_64_16(f[j+ 9]<< 2 & x80008000)>> 24; r2 |= word_slice_64_16(f[j+ 9]<< 1 & x80008000)>> 24; r3 |= word_slice_64_16(f[j+ 9]<< 0 & x80008000)>> 24; case 9: r0 |= word_slice_64_16(f[j+ 8]<< 3 & x80008000)>> 28; r1 |= word_slice_64_16(f[j+ 8]<< 2 & x80008000)>> 28; r2 |= word_slice_64_16(f[j+ 8]<< 1 & x80008000)>> 28; r3 |= word_slice_64_16(f[j+ 8]<< 0 & x80008000)>> 28; case 8: r0 |= word_slice_64_16(f[j+ 7]<< 3 & x80008000)>> 32; r1 |= word_slice_64_16(f[j+ 7]<< 2 & x80008000)>> 32; r2 |= word_slice_64_16(f[j+ 7]<< 1 & x80008000)>> 32; r3 |= word_slice_64_16(f[j+ 7]<< 0 & x80008000)>> 32; case 7: r0 |= word_slice_64_16(f[j+ 6]<< 3 & x80008000)>> 36; r1 |= word_slice_64_16(f[j+ 6]<< 2 & x80008000)>> 36; r2 |= word_slice_64_16(f[j+ 6]<< 1 & x80008000)>> 36; r3 |= word_slice_64_16(f[j+ 6]<< 0 & x80008000)>> 36; case 6: r0 |= word_slice_64_16(f[j+ 5]<< 3 & x80008000)>> 40; r1 |= word_slice_64_16(f[j+ 5]<< 2 & x80008000)>> 40; r2 |= word_slice_64_16(f[j+ 5]<< 1 & x80008000)>> 40; r3 |= word_slice_64_16(f[j+ 5]<< 0 & x80008000)>> 40; case 5: r0 |= word_slice_64_16(f[j+ 4]<< 3 & x80008000)>> 44; r1 |= word_slice_64_16(f[j+ 4]<< 2 & x80008000)>> 44; r2 |= word_slice_64_16(f[j+ 4]<< 1 & x80008000)>> 44; r3 |= word_slice_64_16(f[j+ 4]<< 0 & x80008000)>> 44; case 4: r0 |= word_slice_64_16(f[j+ 3]<< 3 & x80008000)>> 48; r1 |= word_slice_64_16(f[j+ 3]<< 2 & x80008000)>> 48; r2 |= word_slice_64_16(f[j+ 3]<< 1 & x80008000)>> 48; r3 |= word_slice_64_16(f[j+ 3]<< 0 & x80008000)>> 48; case 3: r0 |= word_slice_64_16(f[j+ 2]<< 3 & x80008000)>> 52; r1 |= word_slice_64_16(f[j+ 2]<< 2 & x80008000)>> 52; r2 |= word_slice_64_16(f[j+ 2]<< 1 & x80008000)>> 52; r3 |= word_slice_64_16(f[j+ 2]<< 0 & x80008000)>> 52; case 2: r0 |= word_slice_64_16(f[j+ 1]<< 3 & x80008000)>> 56; r1 |= word_slice_64_16(f[j+ 1]<< 2 & x80008000)>> 56; r2 |= word_slice_64_16(f[j+ 1]<< 1 & x80008000)>> 56; r3 |= word_slice_64_16(f[j+ 1]<< 0 & x80008000)>> 56; case 1: r0 |= word_slice_64_16(f[j+ 0]<< 3 & x80008000)>> 60; r1 |= word_slice_64_16(f[j+ 0]<< 2 & x80008000)>> 60; r2 |= word_slice_64_16(f[j+ 0]<< 1 & x80008000)>> 60; r3 |= word_slice_64_16(f[j+ 0]<< 0 & x80008000)>> 60; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; t2[j2] |= r2 & bitmask_end; t3[j2] |= r3 & bitmask_end; } } break; case 15: { for(size_t i=0; inrows; i++) { word *t0 = T->x[12]->rows[i]; word *t1 = T->x[13]->rows[i]; word *t2 = T->x[14]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+16 < F->x->width; j+=16,j2++) { word_slice_64_16_combine_bulk(t0, j2, f, j, 3); word_slice_64_16_combine_bulk(t1, j2, f, j, 2); word_slice_64_16_combine_bulk(t2, j2, f, j, 1); } r0 = r1 = r2 = 0; switch(F->x->width - j) { case 16: r0 |= word_slice_64_16(f[j+15]<< 3 & x80008000)>> 0; r1 |= word_slice_64_16(f[j+15]<< 2 & x80008000)>> 0; r2 |= word_slice_64_16(f[j+15]<< 1 & x80008000)>> 0; case 15: r0 |= word_slice_64_16(f[j+14]<< 3 & x80008000)>> 4; r1 |= word_slice_64_16(f[j+14]<< 2 & x80008000)>> 4; r2 |= word_slice_64_16(f[j+14]<< 1 & x80008000)>> 4; case 14: r0 |= word_slice_64_16(f[j+13]<< 3 & x80008000)>> 8; r1 |= word_slice_64_16(f[j+13]<< 2 & x80008000)>> 8; r2 |= word_slice_64_16(f[j+13]<< 1 & x80008000)>> 8; case 13: r0 |= word_slice_64_16(f[j+12]<< 3 & x80008000)>> 12; r1 |= word_slice_64_16(f[j+12]<< 2 & x80008000)>> 12; r2 |= word_slice_64_16(f[j+12]<< 1 & x80008000)>> 12; case 12: r0 |= word_slice_64_16(f[j+11]<< 3 & x80008000)>> 16; r1 |= word_slice_64_16(f[j+11]<< 2 & x80008000)>> 16; r2 |= word_slice_64_16(f[j+11]<< 1 & x80008000)>> 16; case 11: r0 |= word_slice_64_16(f[j+10]<< 3 & x80008000)>> 20; r1 |= word_slice_64_16(f[j+10]<< 2 & x80008000)>> 20; r2 |= word_slice_64_16(f[j+10]<< 1 & x80008000)>> 20; case 10: r0 |= word_slice_64_16(f[j+ 9]<< 3 & x80008000)>> 24; r1 |= word_slice_64_16(f[j+ 9]<< 2 & x80008000)>> 24; r2 |= word_slice_64_16(f[j+ 9]<< 1 & x80008000)>> 24; case 9: r0 |= word_slice_64_16(f[j+ 8]<< 3 & x80008000)>> 28; r1 |= word_slice_64_16(f[j+ 8]<< 2 & x80008000)>> 28; r2 |= word_slice_64_16(f[j+ 8]<< 1 & x80008000)>> 28; case 8: r0 |= word_slice_64_16(f[j+ 7]<< 3 & x80008000)>> 32; r1 |= word_slice_64_16(f[j+ 7]<< 2 & x80008000)>> 32; r2 |= word_slice_64_16(f[j+ 7]<< 1 & x80008000)>> 32; case 7: r0 |= word_slice_64_16(f[j+ 6]<< 3 & x80008000)>> 36; r1 |= word_slice_64_16(f[j+ 6]<< 2 & x80008000)>> 36; r2 |= word_slice_64_16(f[j+ 6]<< 1 & x80008000)>> 36; case 6: r0 |= word_slice_64_16(f[j+ 5]<< 3 & x80008000)>> 40; r1 |= word_slice_64_16(f[j+ 5]<< 2 & x80008000)>> 40; r2 |= word_slice_64_16(f[j+ 5]<< 1 & x80008000)>> 40; case 5: r0 |= word_slice_64_16(f[j+ 4]<< 3 & x80008000)>> 44; r1 |= word_slice_64_16(f[j+ 4]<< 2 & x80008000)>> 44; r2 |= word_slice_64_16(f[j+ 4]<< 1 & x80008000)>> 44; case 4: r0 |= word_slice_64_16(f[j+ 3]<< 3 & x80008000)>> 48; r1 |= word_slice_64_16(f[j+ 3]<< 2 & x80008000)>> 48; r2 |= word_slice_64_16(f[j+ 3]<< 1 & x80008000)>> 48; case 3: r0 |= word_slice_64_16(f[j+ 2]<< 3 & x80008000)>> 52; r1 |= word_slice_64_16(f[j+ 2]<< 2 & x80008000)>> 52; r2 |= word_slice_64_16(f[j+ 2]<< 1 & x80008000)>> 52; case 2: r0 |= word_slice_64_16(f[j+ 1]<< 3 & x80008000)>> 56; r1 |= word_slice_64_16(f[j+ 1]<< 2 & x80008000)>> 56; r2 |= word_slice_64_16(f[j+ 1]<< 1 & x80008000)>> 56; case 1: r0 |= word_slice_64_16(f[j+ 0]<< 3 & x80008000)>> 60; r1 |= word_slice_64_16(f[j+ 0]<< 2 & x80008000)>> 60; r2 |= word_slice_64_16(f[j+ 0]<< 1 & x80008000)>> 60; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; t2[j2] |= r2 & bitmask_end; } } break; case 14: { for(size_t i=0; inrows; i++) { word *t0 = T->x[12]->rows[i]; word *t1 = T->x[13]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+16 < F->x->width; j+=16,j2++) { word_slice_64_16_combine_bulk(t0, j2, f, j, 3); word_slice_64_16_combine_bulk(t1, j2, f, j, 2); } r0 = r1 = 0; switch(F->x->width - j) { case 16: r0 |= word_slice_64_16(f[j+15]<< 3 & x80008000)>> 0; r1 |= word_slice_64_16(f[j+15]<< 2 & x80008000)>> 0; case 15: r0 |= word_slice_64_16(f[j+14]<< 3 & x80008000)>> 4; r1 |= word_slice_64_16(f[j+14]<< 2 & x80008000)>> 4; case 14: r0 |= word_slice_64_16(f[j+13]<< 3 & x80008000)>> 8; r1 |= word_slice_64_16(f[j+13]<< 2 & x80008000)>> 8; case 13: r0 |= word_slice_64_16(f[j+12]<< 3 & x80008000)>> 12; r1 |= word_slice_64_16(f[j+12]<< 2 & x80008000)>> 12; case 12: r0 |= word_slice_64_16(f[j+11]<< 3 & x80008000)>> 16; r1 |= word_slice_64_16(f[j+11]<< 2 & x80008000)>> 16; case 11: r0 |= word_slice_64_16(f[j+10]<< 3 & x80008000)>> 20; r1 |= word_slice_64_16(f[j+10]<< 2 & x80008000)>> 20; case 10: r0 |= word_slice_64_16(f[j+ 9]<< 3 & x80008000)>> 24; r1 |= word_slice_64_16(f[j+ 9]<< 2 & x80008000)>> 24; case 9: r0 |= word_slice_64_16(f[j+ 8]<< 3 & x80008000)>> 28; r1 |= word_slice_64_16(f[j+ 8]<< 2 & x80008000)>> 28; case 8: r0 |= word_slice_64_16(f[j+ 7]<< 3 & x80008000)>> 32; r1 |= word_slice_64_16(f[j+ 7]<< 2 & x80008000)>> 32; case 7: r0 |= word_slice_64_16(f[j+ 6]<< 3 & x80008000)>> 36; r1 |= word_slice_64_16(f[j+ 6]<< 2 & x80008000)>> 36; case 6: r0 |= word_slice_64_16(f[j+ 5]<< 3 & x80008000)>> 40; r1 |= word_slice_64_16(f[j+ 5]<< 2 & x80008000)>> 40; case 5: r0 |= word_slice_64_16(f[j+ 4]<< 3 & x80008000)>> 44; r1 |= word_slice_64_16(f[j+ 4]<< 2 & x80008000)>> 44; case 4: r0 |= word_slice_64_16(f[j+ 3]<< 3 & x80008000)>> 48; r1 |= word_slice_64_16(f[j+ 3]<< 2 & x80008000)>> 48; case 3: r0 |= word_slice_64_16(f[j+ 2]<< 3 & x80008000)>> 52; r1 |= word_slice_64_16(f[j+ 2]<< 2 & x80008000)>> 52; case 2: r0 |= word_slice_64_16(f[j+ 1]<< 3 & x80008000)>> 56; r1 |= word_slice_64_16(f[j+ 1]<< 2 & x80008000)>> 56; case 1: r0 |= word_slice_64_16(f[j+ 0]<< 3 & x80008000)>> 60; r1 |= word_slice_64_16(f[j+ 0]<< 2 & x80008000)>> 60; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; } } break; case 13: { for(size_t i=0; inrows; i++) { word *t0 = T->x[12]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+16 < F->x->width; j+=16,j2++) { word_slice_64_16_combine_bulk(t0, j2, f, j, 3); } r0 = 0; switch(F->x->width - j) { case 16: r0 |= word_slice_64_16(f[j+15]<< 3 & x80008000)>> 0; case 15: r0 |= word_slice_64_16(f[j+14]<< 3 & x80008000)>> 4; case 14: r0 |= word_slice_64_16(f[j+13]<< 3 & x80008000)>> 8; case 13: r0 |= word_slice_64_16(f[j+12]<< 3 & x80008000)>> 12; case 12: r0 |= word_slice_64_16(f[j+11]<< 3 & x80008000)>> 16; case 11: r0 |= word_slice_64_16(f[j+10]<< 3 & x80008000)>> 20; case 10: r0 |= word_slice_64_16(f[j+ 9]<< 3 & x80008000)>> 24; case 9: r0 |= word_slice_64_16(f[j+ 8]<< 3 & x80008000)>> 28; case 8: r0 |= word_slice_64_16(f[j+ 7]<< 3 & x80008000)>> 32; case 7: r0 |= word_slice_64_16(f[j+ 6]<< 3 & x80008000)>> 36; case 6: r0 |= word_slice_64_16(f[j+ 5]<< 3 & x80008000)>> 40; case 5: r0 |= word_slice_64_16(f[j+ 4]<< 3 & x80008000)>> 44; case 4: r0 |= word_slice_64_16(f[j+ 3]<< 3 & x80008000)>> 48; case 3: r0 |= word_slice_64_16(f[j+ 2]<< 3 & x80008000)>> 52; case 2: r0 |= word_slice_64_16(f[j+ 1]<< 3 & x80008000)>> 56; case 1: r0 |= word_slice_64_16(f[j+ 0]<< 3 & x80008000)>> 60; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; } } break; } } else { switch(T->depth) { case 11: { for(size_t i=0; inrows; i++) { word *t0 = T->x[ 8]->rows[i]; word *t1 = T->x[ 9]->rows[i]; word *t2 = T->x[10]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+16 < F->x->width; j+=16,j2++) { word_slice_64_16_combine_bulk(t0, j2, f, j, 7); word_slice_64_16_combine_bulk(t1, j2, f, j, 6); word_slice_64_16_combine_bulk(t2, j2, f, j, 5); } r0 = r1 = r2 = 0; switch(F->x->width - j) { case 16: r0 |= word_slice_64_16(f[j+15]<< 7 & x80008000)>> 0; r1 |= word_slice_64_16(f[j+15]<< 6 & x80008000)>> 0; r2 |= word_slice_64_16(f[j+15]<< 5 & x80008000)>> 0; case 15: r0 |= word_slice_64_16(f[j+14]<< 7 & x80008000)>> 4; r1 |= word_slice_64_16(f[j+14]<< 6 & x80008000)>> 4; r2 |= word_slice_64_16(f[j+14]<< 5 & x80008000)>> 4; case 14: r0 |= word_slice_64_16(f[j+13]<< 7 & x80008000)>> 8; r1 |= word_slice_64_16(f[j+13]<< 6 & x80008000)>> 8; r2 |= word_slice_64_16(f[j+13]<< 5 & x80008000)>> 8; case 13: r0 |= word_slice_64_16(f[j+12]<< 7 & x80008000)>> 12; r1 |= word_slice_64_16(f[j+12]<< 6 & x80008000)>> 12; r2 |= word_slice_64_16(f[j+12]<< 5 & x80008000)>> 12; case 12: r0 |= word_slice_64_16(f[j+11]<< 7 & x80008000)>> 16; r1 |= word_slice_64_16(f[j+11]<< 6 & x80008000)>> 16; r2 |= word_slice_64_16(f[j+11]<< 5 & x80008000)>> 16; case 11: r0 |= word_slice_64_16(f[j+10]<< 7 & x80008000)>> 20; r1 |= word_slice_64_16(f[j+10]<< 6 & x80008000)>> 20; r2 |= word_slice_64_16(f[j+10]<< 5 & x80008000)>> 20; case 10: r0 |= word_slice_64_16(f[j+ 9]<< 7 & x80008000)>> 24; r1 |= word_slice_64_16(f[j+ 9]<< 6 & x80008000)>> 24; r2 |= word_slice_64_16(f[j+ 9]<< 5 & x80008000)>> 24; case 9: r0 |= word_slice_64_16(f[j+ 8]<< 7 & x80008000)>> 28; r1 |= word_slice_64_16(f[j+ 8]<< 6 & x80008000)>> 28; r2 |= word_slice_64_16(f[j+ 8]<< 5 & x80008000)>> 28; case 8: r0 |= word_slice_64_16(f[j+ 7]<< 7 & x80008000)>> 32; r1 |= word_slice_64_16(f[j+ 7]<< 6 & x80008000)>> 32; r2 |= word_slice_64_16(f[j+ 7]<< 5 & x80008000)>> 32; case 7: r0 |= word_slice_64_16(f[j+ 6]<< 7 & x80008000)>> 36; r1 |= word_slice_64_16(f[j+ 6]<< 6 & x80008000)>> 36; r2 |= word_slice_64_16(f[j+ 6]<< 5 & x80008000)>> 36; case 6: r0 |= word_slice_64_16(f[j+ 5]<< 7 & x80008000)>> 40; r1 |= word_slice_64_16(f[j+ 5]<< 6 & x80008000)>> 40; r2 |= word_slice_64_16(f[j+ 5]<< 5 & x80008000)>> 40; case 5: r0 |= word_slice_64_16(f[j+ 4]<< 7 & x80008000)>> 44; r1 |= word_slice_64_16(f[j+ 4]<< 6 & x80008000)>> 44; r2 |= word_slice_64_16(f[j+ 4]<< 5 & x80008000)>> 44; case 4: r0 |= word_slice_64_16(f[j+ 3]<< 7 & x80008000)>> 48; r1 |= word_slice_64_16(f[j+ 3]<< 6 & x80008000)>> 48; r2 |= word_slice_64_16(f[j+ 3]<< 5 & x80008000)>> 48; case 3: r0 |= word_slice_64_16(f[j+ 2]<< 7 & x80008000)>> 52; r1 |= word_slice_64_16(f[j+ 2]<< 6 & x80008000)>> 52; r2 |= word_slice_64_16(f[j+ 2]<< 5 & x80008000)>> 52; case 2: r0 |= word_slice_64_16(f[j+ 1]<< 7 & x80008000)>> 56; r1 |= word_slice_64_16(f[j+ 1]<< 6 & x80008000)>> 56; r2 |= word_slice_64_16(f[j+ 1]<< 5 & x80008000)>> 56; case 1: r0 |= word_slice_64_16(f[j+ 0]<< 7 & x80008000)>> 60; r1 |= word_slice_64_16(f[j+ 0]<< 6 & x80008000)>> 60; r2 |= word_slice_64_16(f[j+ 0]<< 5 & x80008000)>> 60; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; t2[j2] |= r2 & bitmask_end; } } break; case 10: { for(size_t i=0; inrows; i++) { word *t0 = T->x[ 8]->rows[i]; word *t1 = T->x[ 9]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+16 < F->x->width; j+=16,j2++) { word_slice_64_16_combine_bulk(t0, j2, f, j, 7); word_slice_64_16_combine_bulk(t1, j2, f, j, 6); } r0 = r1 = 0; switch(F->x->width - j) { case 16: r0 |= word_slice_64_16(f[j+15]<< 7 & x80008000)>> 0; r1 |= word_slice_64_16(f[j+15]<< 6 & x80008000)>> 0; case 15: r0 |= word_slice_64_16(f[j+14]<< 7 & x80008000)>> 4; r1 |= word_slice_64_16(f[j+14]<< 6 & x80008000)>> 4; case 14: r0 |= word_slice_64_16(f[j+13]<< 7 & x80008000)>> 8; r1 |= word_slice_64_16(f[j+13]<< 6 & x80008000)>> 8; case 13: r0 |= word_slice_64_16(f[j+12]<< 7 & x80008000)>> 12; r1 |= word_slice_64_16(f[j+12]<< 6 & x80008000)>> 12; case 12: r0 |= word_slice_64_16(f[j+11]<< 7 & x80008000)>> 16; r1 |= word_slice_64_16(f[j+11]<< 6 & x80008000)>> 16; case 11: r0 |= word_slice_64_16(f[j+10]<< 7 & x80008000)>> 20; r1 |= word_slice_64_16(f[j+10]<< 6 & x80008000)>> 20; case 10: r0 |= word_slice_64_16(f[j+ 9]<< 7 & x80008000)>> 24; r1 |= word_slice_64_16(f[j+ 9]<< 6 & x80008000)>> 24; case 9: r0 |= word_slice_64_16(f[j+ 8]<< 7 & x80008000)>> 28; r1 |= word_slice_64_16(f[j+ 8]<< 6 & x80008000)>> 28; case 8: r0 |= word_slice_64_16(f[j+ 7]<< 7 & x80008000)>> 32; r1 |= word_slice_64_16(f[j+ 7]<< 6 & x80008000)>> 32; case 7: r0 |= word_slice_64_16(f[j+ 6]<< 7 & x80008000)>> 36; r1 |= word_slice_64_16(f[j+ 6]<< 6 & x80008000)>> 36; case 6: r0 |= word_slice_64_16(f[j+ 5]<< 7 & x80008000)>> 40; r1 |= word_slice_64_16(f[j+ 5]<< 6 & x80008000)>> 40; case 5: r0 |= word_slice_64_16(f[j+ 4]<< 7 & x80008000)>> 44; r1 |= word_slice_64_16(f[j+ 4]<< 6 & x80008000)>> 44; case 4: r0 |= word_slice_64_16(f[j+ 3]<< 7 & x80008000)>> 48; r1 |= word_slice_64_16(f[j+ 3]<< 6 & x80008000)>> 48; case 3: r0 |= word_slice_64_16(f[j+ 2]<< 7 & x80008000)>> 52; r1 |= word_slice_64_16(f[j+ 2]<< 6 & x80008000)>> 52; case 2: r0 |= word_slice_64_16(f[j+ 1]<< 7 & x80008000)>> 56; r1 |= word_slice_64_16(f[j+ 1]<< 6 & x80008000)>> 56; case 1: r0 |= word_slice_64_16(f[j+ 0]<< 7 & x80008000)>> 60; r1 |= word_slice_64_16(f[j+ 0]<< 6 & x80008000)>> 60; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; } } break; case 9: { for(size_t i=0; inrows; i++) { word *t0 = T->x[ 8]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+16 < F->x->width; j+=16,j2++) { word_slice_64_16_combine_bulk(t0, j2, f, j, 7); } r0 = 0; switch(F->x->width - j) { case 16: r0 |= word_slice_64_16(f[j+15]<< 7 & x80008000)>> 0; case 15: r0 |= word_slice_64_16(f[j+14]<< 7 & x80008000)>> 4; case 14: r0 |= word_slice_64_16(f[j+13]<< 7 & x80008000)>> 8; case 13: r0 |= word_slice_64_16(f[j+12]<< 7 & x80008000)>> 12; case 12: r0 |= word_slice_64_16(f[j+11]<< 7 & x80008000)>> 16; case 11: r0 |= word_slice_64_16(f[j+10]<< 7 & x80008000)>> 20; case 10: r0 |= word_slice_64_16(f[j+ 9]<< 7 & x80008000)>> 24; case 9: r0 |= word_slice_64_16(f[j+ 8]<< 7 & x80008000)>> 28; case 8: r0 |= word_slice_64_16(f[j+ 7]<< 7 & x80008000)>> 32; case 7: r0 |= word_slice_64_16(f[j+ 6]<< 7 & x80008000)>> 36; case 6: r0 |= word_slice_64_16(f[j+ 5]<< 7 & x80008000)>> 40; case 5: r0 |= word_slice_64_16(f[j+ 4]<< 7 & x80008000)>> 44; case 4: r0 |= word_slice_64_16(f[j+ 3]<< 7 & x80008000)>> 48; case 3: r0 |= word_slice_64_16(f[j+ 2]<< 7 & x80008000)>> 52; case 2: r0 |= word_slice_64_16(f[j+ 1]<< 7 & x80008000)>> 56; case 1: r0 |= word_slice_64_16(f[j+ 0]<< 7 & x80008000)>> 60; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; } } break; default: m4ri_die("impossible"); } } return T; } libm4rie-20130416/src/conversion_slice8.c000066400000000000000000000716601212302364300200600ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010-2013 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "conversion.h" static inline word word_slice_64_08(word a) { a = (a & xff00ff00) | (a & xff00ff00>> 8)<< 7; a = (a & xffff0000) | (a & xffff0000>>16)<<14; a = (a & xffffffff) | (a & xffffffff>>32)<<28; return a; } mzd_slice_t *_mzed_slice8(mzd_slice_t *T, const mzed_t *F) { assert(T && (4 < T->depth && T->depth <= 8) && T->x[0]->offset == 0); size_t j, j2 = 0; register word r0,r1,r2,r3,r4,r5,r6,r7 = 0; const word bitmask_end = __M4RI_LEFT_BITMASK((T->x[0]->offset + T->ncols) % m4ri_radix); if (mzed_is_zero(F)) return T; switch(T->depth) { case 8: { for(size_t i=0; inrows; i++) { word *t0 = T->x[0]->rows[i]; word *t1 = T->x[1]->rows[i]; word *t2 = T->x[2]->rows[i]; word *t3 = T->x[3]->rows[i]; word *t4 = T->x[4]->rows[i]; word *t5 = T->x[5]->rows[i]; word *t6 = T->x[6]->rows[i]; word *t7 = T->x[7]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+8 < F->x->width; j+=8,j2++) { t0[j2] |= word_slice_64_08(f[j+0]<<7 & x80808080)>>56 | word_slice_64_08(f[j+1]<<7 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<7 & x80808080)>>40 | word_slice_64_08(f[j+3]<<7 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<7 & x80808080)>>24 | word_slice_64_08(f[j+5]<<7 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<7 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<7 & x80808080)>> 0; t1[j2] |= word_slice_64_08(f[j+0]<<6 & x80808080)>>56 | word_slice_64_08(f[j+1]<<6 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<6 & x80808080)>>40 | word_slice_64_08(f[j+3]<<6 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<6 & x80808080)>>24 | word_slice_64_08(f[j+5]<<6 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<6 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<6 & x80808080)>> 0; t2[j2] |= word_slice_64_08(f[j+0]<<5 & x80808080)>>56 | word_slice_64_08(f[j+1]<<5 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<5 & x80808080)>>40 | word_slice_64_08(f[j+3]<<5 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<5 & x80808080)>>24 | word_slice_64_08(f[j+5]<<5 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<5 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<5 & x80808080)>> 0; t3[j2] |= word_slice_64_08(f[j+0]<<4 & x80808080)>>56 | word_slice_64_08(f[j+1]<<4 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<4 & x80808080)>>40 | word_slice_64_08(f[j+3]<<4 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<4 & x80808080)>>24 | word_slice_64_08(f[j+5]<<4 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<4 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<4 & x80808080)>> 0; t4[j2] |= word_slice_64_08(f[j+0]<<3 & x80808080)>>56 | word_slice_64_08(f[j+1]<<3 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<3 & x80808080)>>40 | word_slice_64_08(f[j+3]<<3 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<3 & x80808080)>>24 | word_slice_64_08(f[j+5]<<3 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<3 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<3 & x80808080)>> 0; t5[j2] |= word_slice_64_08(f[j+0]<<2 & x80808080)>>56 | word_slice_64_08(f[j+1]<<2 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<2 & x80808080)>>40 | word_slice_64_08(f[j+3]<<2 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<2 & x80808080)>>24 | word_slice_64_08(f[j+5]<<2 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<2 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<2 & x80808080)>> 0; t6[j2] |= word_slice_64_08(f[j+0]<<1 & x80808080)>>56 | word_slice_64_08(f[j+1]<<1 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<1 & x80808080)>>40 | word_slice_64_08(f[j+3]<<1 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<1 & x80808080)>>24 | word_slice_64_08(f[j+5]<<1 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<1 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<1 & x80808080)>> 0; t7[j2] |= word_slice_64_08(f[j+0]<<0 & x80808080)>>56 | word_slice_64_08(f[j+1]<<0 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<0 & x80808080)>>40 | word_slice_64_08(f[j+3]<<0 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<0 & x80808080)>>24 | word_slice_64_08(f[j+5]<<0 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<0 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<0 & x80808080)>> 0; } r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = 0; switch(F->x->width - j) { case 8: r0 |= word_slice_64_08(f[j+7]<<7 & x80808080)>> 0; r1 |= word_slice_64_08(f[j+7]<<6 & x80808080)>> 0; r2 |= word_slice_64_08(f[j+7]<<5 & x80808080)>> 0; r3 |= word_slice_64_08(f[j+7]<<4 & x80808080)>> 0; r4 |= word_slice_64_08(f[j+7]<<3 & x80808080)>> 0; r5 |= word_slice_64_08(f[j+7]<<2 & x80808080)>> 0; r6 |= word_slice_64_08(f[j+7]<<1 & x80808080)>> 0; r7 |= word_slice_64_08(f[j+7]<<0 & x80808080)>> 0; case 7: r0 |= word_slice_64_08(f[j+6]<<7 & x80808080)>> 8; r1 |= word_slice_64_08(f[j+6]<<6 & x80808080)>> 8; r2 |= word_slice_64_08(f[j+6]<<5 & x80808080)>> 8; r3 |= word_slice_64_08(f[j+6]<<4 & x80808080)>> 8; r4 |= word_slice_64_08(f[j+6]<<3 & x80808080)>> 8; r5 |= word_slice_64_08(f[j+6]<<2 & x80808080)>> 8; r6 |= word_slice_64_08(f[j+6]<<1 & x80808080)>> 8; r7 |= word_slice_64_08(f[j+6]<<0 & x80808080)>> 8; case 6: r0 |= word_slice_64_08(f[j+5]<<7 & x80808080)>>16; r1 |= word_slice_64_08(f[j+5]<<6 & x80808080)>>16; r2 |= word_slice_64_08(f[j+5]<<5 & x80808080)>>16; r3 |= word_slice_64_08(f[j+5]<<4 & x80808080)>>16; r4 |= word_slice_64_08(f[j+5]<<3 & x80808080)>>16; r5 |= word_slice_64_08(f[j+5]<<2 & x80808080)>>16; r6 |= word_slice_64_08(f[j+5]<<1 & x80808080)>>16; r7 |= word_slice_64_08(f[j+5]<<0 & x80808080)>>16; case 5: r0 |= word_slice_64_08(f[j+4]<<7 & x80808080)>>24; r1 |= word_slice_64_08(f[j+4]<<6 & x80808080)>>24; r2 |= word_slice_64_08(f[j+4]<<5 & x80808080)>>24; r3 |= word_slice_64_08(f[j+4]<<4 & x80808080)>>24; r4 |= word_slice_64_08(f[j+4]<<3 & x80808080)>>24; r5 |= word_slice_64_08(f[j+4]<<2 & x80808080)>>24; r6 |= word_slice_64_08(f[j+4]<<1 & x80808080)>>24; r7 |= word_slice_64_08(f[j+4]<<0 & x80808080)>>24; case 4: r0 |= word_slice_64_08(f[j+3]<<7 & x80808080)>>32; r1 |= word_slice_64_08(f[j+3]<<6 & x80808080)>>32; r2 |= word_slice_64_08(f[j+3]<<5 & x80808080)>>32; r3 |= word_slice_64_08(f[j+3]<<4 & x80808080)>>32; r4 |= word_slice_64_08(f[j+3]<<3 & x80808080)>>32; r5 |= word_slice_64_08(f[j+3]<<2 & x80808080)>>32; r6 |= word_slice_64_08(f[j+3]<<1 & x80808080)>>32; r7 |= word_slice_64_08(f[j+3]<<0 & x80808080)>>32; case 3: r0 |= word_slice_64_08(f[j+2]<<7 & x80808080)>>40; r1 |= word_slice_64_08(f[j+2]<<6 & x80808080)>>40; r2 |= word_slice_64_08(f[j+2]<<5 & x80808080)>>40; r3 |= word_slice_64_08(f[j+2]<<4 & x80808080)>>40; r4 |= word_slice_64_08(f[j+2]<<3 & x80808080)>>40; r5 |= word_slice_64_08(f[j+2]<<2 & x80808080)>>40; r6 |= word_slice_64_08(f[j+2]<<1 & x80808080)>>40; r7 |= word_slice_64_08(f[j+2]<<0 & x80808080)>>40; case 2: r0 |= word_slice_64_08(f[j+1]<<7 & x80808080)>>48; r1 |= word_slice_64_08(f[j+1]<<6 & x80808080)>>48; r2 |= word_slice_64_08(f[j+1]<<5 & x80808080)>>48; r3 |= word_slice_64_08(f[j+1]<<4 & x80808080)>>48; r4 |= word_slice_64_08(f[j+1]<<3 & x80808080)>>48; r5 |= word_slice_64_08(f[j+1]<<2 & x80808080)>>48; r6 |= word_slice_64_08(f[j+1]<<1 & x80808080)>>48; r7 |= word_slice_64_08(f[j+1]<<0 & x80808080)>>48; case 1: r0 |= word_slice_64_08(f[j+0]<<7 & x80808080)>>56; r1 |= word_slice_64_08(f[j+0]<<6 & x80808080)>>56; r2 |= word_slice_64_08(f[j+0]<<5 & x80808080)>>56; r3 |= word_slice_64_08(f[j+0]<<4 & x80808080)>>56; r4 |= word_slice_64_08(f[j+0]<<3 & x80808080)>>56; r5 |= word_slice_64_08(f[j+0]<<2 & x80808080)>>56; r6 |= word_slice_64_08(f[j+0]<<1 & x80808080)>>56; r7 |= word_slice_64_08(f[j+0]<<0 & x80808080)>>56; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; t2[j2] |= r2 & bitmask_end; t3[j2] |= r3 & bitmask_end; t4[j2] |= r4 & bitmask_end; t5[j2] |= r5 & bitmask_end; t6[j2] |= r6 & bitmask_end; t7[j2] |= r7 & bitmask_end; } } break; case 7: { for(size_t i=0; inrows; i++) { word *t0 = T->x[0]->rows[i]; word *t1 = T->x[1]->rows[i]; word *t2 = T->x[2]->rows[i]; word *t3 = T->x[3]->rows[i]; word *t4 = T->x[4]->rows[i]; word *t5 = T->x[5]->rows[i]; word *t6 = T->x[6]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+8 < F->x->width; j+=8,j2++) { t0[j2] |= word_slice_64_08(f[j+0]<<7 & x80808080)>>56 | word_slice_64_08(f[j+1]<<7 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<7 & x80808080)>>40 | word_slice_64_08(f[j+3]<<7 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<7 & x80808080)>>24 | word_slice_64_08(f[j+5]<<7 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<7 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<7 & x80808080)>> 0; t1[j2] |= word_slice_64_08(f[j+0]<<6 & x80808080)>>56 | word_slice_64_08(f[j+1]<<6 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<6 & x80808080)>>40 | word_slice_64_08(f[j+3]<<6 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<6 & x80808080)>>24 | word_slice_64_08(f[j+5]<<6 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<6 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<6 & x80808080)>> 0; t2[j2] |= word_slice_64_08(f[j+0]<<5 & x80808080)>>56 | word_slice_64_08(f[j+1]<<5 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<5 & x80808080)>>40 | word_slice_64_08(f[j+3]<<5 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<5 & x80808080)>>24 | word_slice_64_08(f[j+5]<<5 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<5 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<5 & x80808080)>> 0; t3[j2] |= word_slice_64_08(f[j+0]<<4 & x80808080)>>56 | word_slice_64_08(f[j+1]<<4 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<4 & x80808080)>>40 | word_slice_64_08(f[j+3]<<4 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<4 & x80808080)>>24 | word_slice_64_08(f[j+5]<<4 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<4 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<4 & x80808080)>> 0; t4[j2] |= word_slice_64_08(f[j+0]<<3 & x80808080)>>56 | word_slice_64_08(f[j+1]<<3 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<3 & x80808080)>>40 | word_slice_64_08(f[j+3]<<3 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<3 & x80808080)>>24 | word_slice_64_08(f[j+5]<<3 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<3 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<3 & x80808080)>> 0; t5[j2] |= word_slice_64_08(f[j+0]<<2 & x80808080)>>56 | word_slice_64_08(f[j+1]<<2 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<2 & x80808080)>>40 | word_slice_64_08(f[j+3]<<2 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<2 & x80808080)>>24 | word_slice_64_08(f[j+5]<<2 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<2 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<2 & x80808080)>> 0; t6[j2] |= word_slice_64_08(f[j+0]<<1 & x80808080)>>56 | word_slice_64_08(f[j+1]<<1 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<1 & x80808080)>>40 | word_slice_64_08(f[j+3]<<1 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<1 & x80808080)>>24 | word_slice_64_08(f[j+5]<<1 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<1 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<1 & x80808080)>> 0; } r0 = r1 = r2 = r3 = r4 = r5 = r6 = 0; switch(F->x->width - j) { case 8: r0 |= word_slice_64_08(f[j+7]<<7 & x80808080)>> 0; r1 |= word_slice_64_08(f[j+7]<<6 & x80808080)>> 0; r2 |= word_slice_64_08(f[j+7]<<5 & x80808080)>> 0; r3 |= word_slice_64_08(f[j+7]<<4 & x80808080)>> 0; r4 |= word_slice_64_08(f[j+7]<<3 & x80808080)>> 0; r5 |= word_slice_64_08(f[j+7]<<2 & x80808080)>> 0; r6 |= word_slice_64_08(f[j+7]<<1 & x80808080)>> 0; case 7: r0 |= word_slice_64_08(f[j+6]<<7 & x80808080)>> 8; r1 |= word_slice_64_08(f[j+6]<<6 & x80808080)>> 8; r2 |= word_slice_64_08(f[j+6]<<5 & x80808080)>> 8; r3 |= word_slice_64_08(f[j+6]<<4 & x80808080)>> 8; r4 |= word_slice_64_08(f[j+6]<<3 & x80808080)>> 8; r5 |= word_slice_64_08(f[j+6]<<2 & x80808080)>> 8; r6 |= word_slice_64_08(f[j+6]<<1 & x80808080)>> 8; case 6: r0 |= word_slice_64_08(f[j+5]<<7 & x80808080)>>16; r1 |= word_slice_64_08(f[j+5]<<6 & x80808080)>>16; r2 |= word_slice_64_08(f[j+5]<<5 & x80808080)>>16; r3 |= word_slice_64_08(f[j+5]<<4 & x80808080)>>16; r4 |= word_slice_64_08(f[j+5]<<3 & x80808080)>>16; r5 |= word_slice_64_08(f[j+5]<<2 & x80808080)>>16; r6 |= word_slice_64_08(f[j+5]<<1 & x80808080)>>16; case 5: r0 |= word_slice_64_08(f[j+4]<<7 & x80808080)>>24; r1 |= word_slice_64_08(f[j+4]<<6 & x80808080)>>24; r2 |= word_slice_64_08(f[j+4]<<5 & x80808080)>>24; r3 |= word_slice_64_08(f[j+4]<<4 & x80808080)>>24; r4 |= word_slice_64_08(f[j+4]<<3 & x80808080)>>24; r5 |= word_slice_64_08(f[j+4]<<2 & x80808080)>>24; r6 |= word_slice_64_08(f[j+4]<<1 & x80808080)>>24; case 4: r0 |= word_slice_64_08(f[j+3]<<7 & x80808080)>>32; r1 |= word_slice_64_08(f[j+3]<<6 & x80808080)>>32; r2 |= word_slice_64_08(f[j+3]<<5 & x80808080)>>32; r3 |= word_slice_64_08(f[j+3]<<4 & x80808080)>>32; r4 |= word_slice_64_08(f[j+3]<<3 & x80808080)>>32; r5 |= word_slice_64_08(f[j+3]<<2 & x80808080)>>32; r6 |= word_slice_64_08(f[j+3]<<1 & x80808080)>>32; case 3: r0 |= word_slice_64_08(f[j+2]<<7 & x80808080)>>40; r1 |= word_slice_64_08(f[j+2]<<6 & x80808080)>>40; r2 |= word_slice_64_08(f[j+2]<<5 & x80808080)>>40; r3 |= word_slice_64_08(f[j+2]<<4 & x80808080)>>40; r4 |= word_slice_64_08(f[j+2]<<3 & x80808080)>>40; r5 |= word_slice_64_08(f[j+2]<<2 & x80808080)>>40; r6 |= word_slice_64_08(f[j+2]<<1 & x80808080)>>40; case 2: r0 |= word_slice_64_08(f[j+1]<<7 & x80808080)>>48; r1 |= word_slice_64_08(f[j+1]<<6 & x80808080)>>48; r2 |= word_slice_64_08(f[j+1]<<5 & x80808080)>>48; r3 |= word_slice_64_08(f[j+1]<<4 & x80808080)>>48; r4 |= word_slice_64_08(f[j+1]<<3 & x80808080)>>48; r5 |= word_slice_64_08(f[j+1]<<2 & x80808080)>>48; r6 |= word_slice_64_08(f[j+1]<<1 & x80808080)>>48; case 1: r0 |= word_slice_64_08(f[j+0]<<7 & x80808080)>>56; r1 |= word_slice_64_08(f[j+0]<<6 & x80808080)>>56; r2 |= word_slice_64_08(f[j+0]<<5 & x80808080)>>56; r3 |= word_slice_64_08(f[j+0]<<4 & x80808080)>>56; r4 |= word_slice_64_08(f[j+0]<<3 & x80808080)>>56; r5 |= word_slice_64_08(f[j+0]<<2 & x80808080)>>56; r6 |= word_slice_64_08(f[j+0]<<1 & x80808080)>>56; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; t2[j2] |= r2 & bitmask_end; t3[j2] |= r3 & bitmask_end; t4[j2] |= r4 & bitmask_end; t5[j2] |= r5 & bitmask_end; t6[j2] |= r6 & bitmask_end; } } break; case 6: { for(size_t i=0; inrows; i++) { word *t0 = T->x[0]->rows[i]; word *t1 = T->x[1]->rows[i]; word *t2 = T->x[2]->rows[i]; word *t3 = T->x[3]->rows[i]; word *t4 = T->x[4]->rows[i]; word *t5 = T->x[5]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+8 < F->x->width; j+=8,j2++) { t0[j2] |= word_slice_64_08(f[j+0]<<7 & x80808080)>>56 | word_slice_64_08(f[j+1]<<7 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<7 & x80808080)>>40 | word_slice_64_08(f[j+3]<<7 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<7 & x80808080)>>24 | word_slice_64_08(f[j+5]<<7 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<7 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<7 & x80808080)>> 0; t1[j2] |= word_slice_64_08(f[j+0]<<6 & x80808080)>>56 | word_slice_64_08(f[j+1]<<6 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<6 & x80808080)>>40 | word_slice_64_08(f[j+3]<<6 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<6 & x80808080)>>24 | word_slice_64_08(f[j+5]<<6 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<6 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<6 & x80808080)>> 0; t2[j2] |= word_slice_64_08(f[j+0]<<5 & x80808080)>>56 | word_slice_64_08(f[j+1]<<5 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<5 & x80808080)>>40 | word_slice_64_08(f[j+3]<<5 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<5 & x80808080)>>24 | word_slice_64_08(f[j+5]<<5 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<5 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<5 & x80808080)>> 0; t3[j2] |= word_slice_64_08(f[j+0]<<4 & x80808080)>>56 | word_slice_64_08(f[j+1]<<4 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<4 & x80808080)>>40 | word_slice_64_08(f[j+3]<<4 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<4 & x80808080)>>24 | word_slice_64_08(f[j+5]<<4 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<4 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<4 & x80808080)>> 0; t4[j2] |= word_slice_64_08(f[j+0]<<3 & x80808080)>>56 | word_slice_64_08(f[j+1]<<3 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<3 & x80808080)>>40 | word_slice_64_08(f[j+3]<<3 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<3 & x80808080)>>24 | word_slice_64_08(f[j+5]<<3 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<3 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<3 & x80808080)>> 0; t5[j2] |= word_slice_64_08(f[j+0]<<2 & x80808080)>>56 | word_slice_64_08(f[j+1]<<2 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<2 & x80808080)>>40 | word_slice_64_08(f[j+3]<<2 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<2 & x80808080)>>24 | word_slice_64_08(f[j+5]<<2 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<2 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<2 & x80808080)>> 0; } r0 = r1 = r2 = r3 = r4 = r5 = 0; switch(F->x->width - j) { case 8: r0 |= word_slice_64_08(f[j+7]<<7 & x80808080)>> 0; r1 |= word_slice_64_08(f[j+7]<<6 & x80808080)>> 0; r2 |= word_slice_64_08(f[j+7]<<5 & x80808080)>> 0; r3 |= word_slice_64_08(f[j+7]<<4 & x80808080)>> 0; r4 |= word_slice_64_08(f[j+7]<<3 & x80808080)>> 0; r5 |= word_slice_64_08(f[j+7]<<2 & x80808080)>> 0; case 7: r0 |= word_slice_64_08(f[j+6]<<7 & x80808080)>> 8; r1 |= word_slice_64_08(f[j+6]<<6 & x80808080)>> 8; r2 |= word_slice_64_08(f[j+6]<<5 & x80808080)>> 8; r3 |= word_slice_64_08(f[j+6]<<4 & x80808080)>> 8; r4 |= word_slice_64_08(f[j+6]<<3 & x80808080)>> 8; r5 |= word_slice_64_08(f[j+6]<<2 & x80808080)>> 8; case 6: r0 |= word_slice_64_08(f[j+5]<<7 & x80808080)>>16; r1 |= word_slice_64_08(f[j+5]<<6 & x80808080)>>16; r2 |= word_slice_64_08(f[j+5]<<5 & x80808080)>>16; r3 |= word_slice_64_08(f[j+5]<<4 & x80808080)>>16; r4 |= word_slice_64_08(f[j+5]<<3 & x80808080)>>16; r5 |= word_slice_64_08(f[j+5]<<2 & x80808080)>>16; case 5: r0 |= word_slice_64_08(f[j+4]<<7 & x80808080)>>24; r1 |= word_slice_64_08(f[j+4]<<6 & x80808080)>>24; r2 |= word_slice_64_08(f[j+4]<<5 & x80808080)>>24; r3 |= word_slice_64_08(f[j+4]<<4 & x80808080)>>24; r4 |= word_slice_64_08(f[j+4]<<3 & x80808080)>>24; r5 |= word_slice_64_08(f[j+4]<<2 & x80808080)>>24; case 4: r0 |= word_slice_64_08(f[j+3]<<7 & x80808080)>>32; r1 |= word_slice_64_08(f[j+3]<<6 & x80808080)>>32; r2 |= word_slice_64_08(f[j+3]<<5 & x80808080)>>32; r3 |= word_slice_64_08(f[j+3]<<4 & x80808080)>>32; r4 |= word_slice_64_08(f[j+3]<<3 & x80808080)>>32; r5 |= word_slice_64_08(f[j+3]<<2 & x80808080)>>32; case 3: r0 |= word_slice_64_08(f[j+2]<<7 & x80808080)>>40; r1 |= word_slice_64_08(f[j+2]<<6 & x80808080)>>40; r2 |= word_slice_64_08(f[j+2]<<5 & x80808080)>>40; r3 |= word_slice_64_08(f[j+2]<<4 & x80808080)>>40; r4 |= word_slice_64_08(f[j+2]<<3 & x80808080)>>40; r5 |= word_slice_64_08(f[j+2]<<2 & x80808080)>>40; case 2: r0 |= word_slice_64_08(f[j+1]<<7 & x80808080)>>48; r1 |= word_slice_64_08(f[j+1]<<6 & x80808080)>>48; r2 |= word_slice_64_08(f[j+1]<<5 & x80808080)>>48; r3 |= word_slice_64_08(f[j+1]<<4 & x80808080)>>48; r4 |= word_slice_64_08(f[j+1]<<3 & x80808080)>>48; r5 |= word_slice_64_08(f[j+1]<<2 & x80808080)>>48; case 1: r0 |= word_slice_64_08(f[j+0]<<7 & x80808080)>>56; r1 |= word_slice_64_08(f[j+0]<<6 & x80808080)>>56; r2 |= word_slice_64_08(f[j+0]<<5 & x80808080)>>56; r3 |= word_slice_64_08(f[j+0]<<4 & x80808080)>>56; r4 |= word_slice_64_08(f[j+0]<<3 & x80808080)>>56; r5 |= word_slice_64_08(f[j+0]<<2 & x80808080)>>56; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; t2[j2] |= r2 & bitmask_end; t3[j2] |= r3 & bitmask_end; t4[j2] |= r4 & bitmask_end; t5[j2] |= r5 & bitmask_end; } } break; case 5: { for(size_t i=0; inrows; i++) { word *t0 = T->x[0]->rows[i]; word *t1 = T->x[1]->rows[i]; word *t2 = T->x[2]->rows[i]; word *t3 = T->x[3]->rows[i]; word *t4 = T->x[4]->rows[i]; const word const *f = F->x->rows[i]; /* bulk of work */ for(j=0, j2=0; j+8 < F->x->width; j+=8,j2++) { t0[j2] |= word_slice_64_08(f[j+0]<<7 & x80808080)>>56 | word_slice_64_08(f[j+1]<<7 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<7 & x80808080)>>40 | word_slice_64_08(f[j+3]<<7 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<7 & x80808080)>>24 | word_slice_64_08(f[j+5]<<7 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<7 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<7 & x80808080)>> 0; t1[j2] |= word_slice_64_08(f[j+0]<<6 & x80808080)>>56 | word_slice_64_08(f[j+1]<<6 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<6 & x80808080)>>40 | word_slice_64_08(f[j+3]<<6 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<6 & x80808080)>>24 | word_slice_64_08(f[j+5]<<6 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<6 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<6 & x80808080)>> 0; t2[j2] |= word_slice_64_08(f[j+0]<<5 & x80808080)>>56 | word_slice_64_08(f[j+1]<<5 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<5 & x80808080)>>40 | word_slice_64_08(f[j+3]<<5 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<5 & x80808080)>>24 | word_slice_64_08(f[j+5]<<5 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<5 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<5 & x80808080)>> 0; t3[j2] |= word_slice_64_08(f[j+0]<<4 & x80808080)>>56 | word_slice_64_08(f[j+1]<<4 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<4 & x80808080)>>40 | word_slice_64_08(f[j+3]<<4 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<4 & x80808080)>>24 | word_slice_64_08(f[j+5]<<4 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<4 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<4 & x80808080)>> 0; t4[j2] |= word_slice_64_08(f[j+0]<<3 & x80808080)>>56 | word_slice_64_08(f[j+1]<<3 & x80808080)>>48 \ | word_slice_64_08(f[j+2]<<3 & x80808080)>>40 | word_slice_64_08(f[j+3]<<3 & x80808080)>>32 \ | word_slice_64_08(f[j+4]<<3 & x80808080)>>24 | word_slice_64_08(f[j+5]<<3 & x80808080)>>16 \ | word_slice_64_08(f[j+6]<<3 & x80808080)>> 8 | word_slice_64_08(f[j+7]<<3 & x80808080)>> 0; } r0 = r1 = r2 = r3 = r4 = 0; switch(F->x->width - j) { case 8: r0 |= word_slice_64_08(f[j+7]<<7 & x80808080)>> 0; r1 |= word_slice_64_08(f[j+7]<<6 & x80808080)>> 0; r2 |= word_slice_64_08(f[j+7]<<5 & x80808080)>> 0; r3 |= word_slice_64_08(f[j+7]<<4 & x80808080)>> 0; r4 |= word_slice_64_08(f[j+7]<<3 & x80808080)>> 0; case 7: r0 |= word_slice_64_08(f[j+6]<<7 & x80808080)>> 8; r1 |= word_slice_64_08(f[j+6]<<6 & x80808080)>> 8; r2 |= word_slice_64_08(f[j+6]<<5 & x80808080)>> 8; r3 |= word_slice_64_08(f[j+6]<<4 & x80808080)>> 8; r4 |= word_slice_64_08(f[j+6]<<3 & x80808080)>> 8; case 6: r0 |= word_slice_64_08(f[j+5]<<7 & x80808080)>>16; r1 |= word_slice_64_08(f[j+5]<<6 & x80808080)>>16; r2 |= word_slice_64_08(f[j+5]<<5 & x80808080)>>16; r3 |= word_slice_64_08(f[j+5]<<4 & x80808080)>>16; r4 |= word_slice_64_08(f[j+5]<<3 & x80808080)>>16; case 5: r0 |= word_slice_64_08(f[j+4]<<7 & x80808080)>>24; r1 |= word_slice_64_08(f[j+4]<<6 & x80808080)>>24; r2 |= word_slice_64_08(f[j+4]<<5 & x80808080)>>24; r3 |= word_slice_64_08(f[j+4]<<4 & x80808080)>>24; r4 |= word_slice_64_08(f[j+4]<<3 & x80808080)>>24; case 4: r0 |= word_slice_64_08(f[j+3]<<7 & x80808080)>>32; r1 |= word_slice_64_08(f[j+3]<<6 & x80808080)>>32; r2 |= word_slice_64_08(f[j+3]<<5 & x80808080)>>32; r3 |= word_slice_64_08(f[j+3]<<4 & x80808080)>>32; r4 |= word_slice_64_08(f[j+3]<<3 & x80808080)>>32; case 3: r0 |= word_slice_64_08(f[j+2]<<7 & x80808080)>>40; r1 |= word_slice_64_08(f[j+2]<<6 & x80808080)>>40; r2 |= word_slice_64_08(f[j+2]<<5 & x80808080)>>40; r3 |= word_slice_64_08(f[j+2]<<4 & x80808080)>>40; r4 |= word_slice_64_08(f[j+2]<<3 & x80808080)>>40; case 2: r0 |= word_slice_64_08(f[j+1]<<7 & x80808080)>>48; r1 |= word_slice_64_08(f[j+1]<<6 & x80808080)>>48; r2 |= word_slice_64_08(f[j+1]<<5 & x80808080)>>48; r3 |= word_slice_64_08(f[j+1]<<4 & x80808080)>>48; r4 |= word_slice_64_08(f[j+1]<<3 & x80808080)>>48; case 1: r0 |= word_slice_64_08(f[j+0]<<7 & x80808080)>>56; r1 |= word_slice_64_08(f[j+0]<<6 & x80808080)>>56; r2 |= word_slice_64_08(f[j+0]<<5 & x80808080)>>56; r3 |= word_slice_64_08(f[j+0]<<4 & x80808080)>>56; r4 |= word_slice_64_08(f[j+0]<<3 & x80808080)>>56; break; default: m4ri_die("impossible"); } t0[j2] |= r0 & bitmask_end; t1[j2] |= r1 & bitmask_end; t2[j2] |= r2 & bitmask_end; t3[j2] |= r3 & bitmask_end; t4[j2] |= r4 & bitmask_end; } } break; default: m4ri_die("impossible\n"); } return T; } libm4rie-20130416/src/echelonform.c000066400000000000000000000067731212302364300167300ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "echelonform.h" #include "newton_john.h" #include "permutation.h" #include "trsm.h" #include "ple.h" rci_t mzed_echelonize(mzed_t *A, int full) { if (A->finite_field->degree > A->nrows) { return mzed_echelonize_naive(A, full); } else if ((A->nrows * A->ncols * A->w <= 2*__M4RIE_PLE_CUTOFF)) { return mzed_echelonize_newton_john(A, full); } else { return mzed_echelonize_ple(A, full); } } rci_t mzd_slice_echelonize_ple(mzd_slice_t *A, int full) { mzp_t *P = mzp_init(A->nrows); mzp_t *Q = mzp_init(A->ncols); rci_t r; if(full) { r = mzd_slice_pluq(A, P, Q); mzd_slice_t *U = mzd_slice_init_window(A, 0, 0, r, r); const rci_t r_radix = m4ri_radix*(r/m4ri_radix); if(r_radix == r && r!=A->ncols) { mzd_slice_t *B = mzd_slice_init_window(A, 0, r, r, A->ncols); for(rci_t i = 0; i < r; ++i) mzd_slice_write_elem(U, i, i, 1); mzd_slice_trsm_upper_left(U, B); mzd_slice_free_window(B); } else if (r_radix != r && r!=A->ncols) { assert(r_radix < r); if(A->ncols > r_radix+m4ri_radix) { mzd_slice_t *B0 = mzd_slice_submatrix(NULL, A, 0, r_radix, r, r_radix+m4ri_radix); mzd_slice_t *B0w = mzd_slice_init_window( A, 0, r_radix, r, r_radix+m4ri_radix); mzd_slice_t *B1 = mzd_slice_init_window(A, 0, r_radix+m4ri_radix, r, A->ncols); for(rci_t i = 0; i < r; ++i) mzd_slice_write_elem(U, i, i, 1); mzd_slice_trsm_upper_left(U, B0); mzd_slice_trsm_upper_left(U, B1); mzd_slice_copy(B0w, B0); mzd_slice_free(B0); mzd_slice_free_window(B0w); mzd_slice_free_window(B1); } else { mzd_slice_t *B = mzd_slice_submatrix(NULL, A, 0, r_radix, r, A->ncols); mzd_slice_t *Bw = mzd_slice_init_window(A, 0, r_radix, r, A->ncols); for(rci_t i = 0; i < r; ++i) mzd_slice_write_elem(U, i, i, 1); mzd_slice_trsm_upper_left(U, B); mzd_slice_copy(Bw, B); mzd_slice_free_window(Bw); mzd_slice_free(B); } } mzd_slice_set_ui(U, 1); mzd_slice_free_window(U); if(r) { mzd_slice_t *A0 = mzd_slice_init_window(A, 0, 0, r, A->ncols); mzd_slice_apply_p_right(A0, Q); mzd_slice_free_window(A0); } } else { r = mzd_slice_ple(A, P, Q); for(rci_t i = 0; i < r; ++i) { for(int e=0; e < A->depth; e++) { for(rci_t j = 0; j <= i; j++) { int const length = MIN(m4ri_radix, i - j + 1); mzd_clear_bits(A->x[e], i, j, length); } } mzd_slice_write_elem(A, i, Q->values[i], 1); } } if(r != A->nrows) { mzd_slice_t *R = mzd_slice_init_window(A, r, 0, A->nrows, A->ncols); mzd_slice_set_ui(R, 0); mzd_slice_free_window(R); } mzp_free(P); mzp_free(Q); return r; } libm4rie-20130416/src/echelonform.h000066400000000000000000000051751212302364300167300ustar00rootroot00000000000000/** * \file echelonform.h * * \brief Echelon forms. * * \author Martin Albrecht */ #ifndef M4RIE_ECHELONFORM_H #define M4RIE_ECHELONFORM_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010,2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include #include /** * \brief Compute row echelon forms using PLE decomposition. * * Compute the (reduced) row echelon form of the matrix A. If full=0, * then return the reduced row echelon. This function reduces echelon * forms to PLE (or PLUQ) decomposition. * * \param A Matrix * \param full REF or RREF. * * \ingroup Echelon */ rci_t mzd_slice_echelonize_ple(mzd_slice_t *A, int full); /** * \brief Compute row echelon forms using PLE decomposition. * * Compute the (reduced) row echelon form of the matrix A. If full=0, * then return the reduced REF. This function reduces echelon forms to * PLE (or PLUQ) decomposition. * * \param A Matrix * \param full REF or RREF. * * \ingroup Echelon * * \note This function converts A to bitslice representation and * back. Hence, it uses more memory than using * mzed_echelonize_newton_john() or mzd_slice_echelonize_ple() */ static inline rci_t mzed_echelonize_ple(mzed_t *A, int full) { mzd_slice_t *a = mzed_slice(NULL, A); rci_t r = mzd_slice_echelonize_ple(a, full); mzed_cling(A, a); mzd_slice_free(a); return r; } /** * \brief Compute row echelon forms. * * Compute the (reduced) row echelon form of the matrix A. If full=0, * then return the reduced REF. * * \param A Matrix * \param full REF or RREF. * * \ingroup Echelon */ #define mzd_slice_echelonize mzd_slice_echelonize_ple /** * \brief Compute row echelon forms. * * Compute the (reduced) row echelon form of the matrix A. If full=0, * then return the reduced row echelon form. * * \param A Matrix * \param full REF or RREF. * * \ingroup Echelon */ rci_t mzed_echelonize(mzed_t *A, int full); #endif //M4RIE_ECHELONFORM_H libm4rie-20130416/src/gf2e.c000066400000000000000000002416211212302364300152430ustar00rootroot00000000000000#include #include "gf2e.h" gf2e *gf2e_init(const word minpoly) { gf2e *ff = (gf2e*)m4ri_mm_calloc(1, sizeof(gf2e)); for(int i=0; i<=M4RIE_MAX_DEGREE; i++) if(1<degree = i; ff->minpoly = minpoly; const unsigned int order = __M4RI_TWOPOW(ff->degree); /** red **/ ff->red = (word*)m4ri_mm_calloc(order, sizeof(word)); for(unsigned int i=1; idegree; j++) if (__M4RI_TWOPOW(j) & i) tmp ^= minpoly<red[tmp>>ff->degree] == 0); ff->red[tmp>>ff->degree] = tmp; } /** pow_gen: X^i **/ unsigned int n = 2*ff->degree-1; ff->pow_gen = (word*)m4ri_mm_malloc( n * sizeof(word)); for(unsigned int i=0; ipow_gen[i] = 1<=ff->degree; j--) { if (ff->pow_gen[i] & 1<pow_gen[i] ^= ff->minpoly<<(j - ff->degree); } } if(ff->degree <= 8) { /** mul tables **/ ff->_mul = (word **)m4ri_mm_calloc(order, sizeof(word *)); ff->_mul[0] = (word *)m4ri_mm_calloc(order, sizeof(word)); for(unsigned int i = 1; i_mul[i] = (word *)m4ri_mm_calloc(order, sizeof(word)); for(unsigned int j=1; jdegree); ff->_mul[i][j] = res ^ ff->red[res>>ff->degree]; } } ff->mul = _gf2e_mul_table; } else { ff->mul = _gf2e_mul_arith; } ff->inv = gf2e_inv; return ff; } void gf2e_free(gf2e *ff) { if (ff->_mul) { for(size_t i=0; i<__M4RI_TWOPOW(ff->degree); i++) { m4ri_mm_free(ff->_mul[i]); } m4ri_mm_free(ff->_mul); } m4ri_mm_free(ff->pow_gen); m4ri_mm_free(ff->red); } const word _irreducible_polynomials_degree_02[ 2] = { 1, 0x00007 }; const word _irreducible_polynomials_degree_03[ 3] = { 2, 0x0000b, 0x0000d }; const word _irreducible_polynomials_degree_04[ 4] = { 3, 0x00013, 0x00019, 0x0001f }; const word _irreducible_polynomials_degree_05[ 7] = { 6, 0x00025, 0x00029, 0x0002f, 0x00037, 0x0003b, 0x0003d }; const word _irreducible_polynomials_degree_06[ 10] = { 9, 0x00043, 0x00049, 0x00057, 0x0005b, 0x00061, 0x00067, 0x0006d, 0x00073, 0x00075 }; const word _irreducible_polynomials_degree_07[ 19] = { 18, 0x00083, 0x00089, 0x0008f, 0x00091, 0x0009d, 0x000a7, 0x000ab, 0x000b9, 0x000bf, 0x000c1, 0x000cb, 0x000d3, 0x000d5, 0x000e5, 0x000ef, 0x000f1, 0x000f7, 0x000fd }; const word _irreducible_polynomials_degree_08[ 31] = { 30, 0x0011b, 0x0011d, 0x0012b, 0x0012d, 0x00139, 0x0013f, 0x0014d, 0x0015f, 0x00163, 0x00165, 0x00169, 0x00171, 0x00177, 0x0017b, 0x00187, 0x0018b, 0x0018d, 0x0019f, 0x001a3, 0x001a9, 0x001b1, 0x001bd, 0x001c3, 0x001cf, 0x001d7, 0x001dd, 0x001e7, 0x001f3, 0x001f5, 0x001f9 }; const word _irreducible_polynomials_degree_09[ 57] = { 56, 0x00203, 0x00211, 0x00217, 0x0021b, 0x00221, 0x0022d, 0x00233, 0x0024b, 0x00259, 0x0025f, 0x00265, 0x00269, 0x0026f, 0x00277, 0x0027d, 0x00287, 0x00295, 0x00299, 0x002a3, 0x002a5, 0x002af, 0x002b7, 0x002bd, 0x002cf, 0x002d1, 0x002db, 0x002f5, 0x002f9, 0x00301, 0x00313, 0x00315, 0x0031f, 0x00323, 0x00331, 0x0033b, 0x00349, 0x0034f, 0x0035b, 0x00361, 0x0036b, 0x0036d, 0x00373, 0x0037f, 0x00385, 0x0038f, 0x003a1, 0x003b5, 0x003b9, 0x003c7, 0x003cb, 0x003cd, 0x003d5, 0x003d9, 0x003e3, 0x003e9, 0x003fb }; const word _irreducible_polynomials_degree_10[ 100] = { 99, 0x00409, 0x0040f, 0x0041b, 0x0041d, 0x00427, 0x0042d, 0x00435, 0x00447, 0x00453, 0x00463, 0x00465, 0x0046f, 0x00481, 0x0048b, 0x00499, 0x004a9, 0x004af, 0x004c5, 0x004c9, 0x004d7, 0x004e7, 0x004ed, 0x004f3, 0x004ff, 0x0050b, 0x0050d, 0x00519, 0x0051f, 0x00523, 0x00531, 0x0053d, 0x00543, 0x00557, 0x00561, 0x00567, 0x0056b, 0x00585, 0x0058f, 0x00597, 0x0059b, 0x005a1, 0x005ab, 0x005b9, 0x005c1, 0x005c7, 0x005e5, 0x005f7, 0x005fb, 0x00613, 0x00615, 0x00623, 0x00625, 0x00631, 0x00637, 0x00643, 0x0064f, 0x00651, 0x0065b, 0x00679, 0x0067f, 0x00685, 0x00689, 0x006a7, 0x006ad, 0x006b5, 0x006bf, 0x006c1, 0x006cd, 0x006d3, 0x006df, 0x006f7, 0x006fd, 0x0070f, 0x00711, 0x00717, 0x0071d, 0x00721, 0x0072b, 0x00735, 0x00739, 0x00747, 0x0074d, 0x00755, 0x00759, 0x00763, 0x0077b, 0x0077d, 0x00781, 0x00787, 0x0078d, 0x00793, 0x007a9, 0x007b1, 0x007c5, 0x007db, 0x007eb, 0x007f3, 0x007f9, 0x007ff }; const word _irreducible_polynomials_degree_11[ 187] = { 186, 0x00805, 0x00817, 0x0082b, 0x0082d, 0x00847, 0x00863, 0x00865, 0x00871, 0x0087b, 0x0088d, 0x00895, 0x0089f, 0x008a9, 0x008b1, 0x008c3, 0x008cf, 0x008d1, 0x008e1, 0x008e7, 0x008eb, 0x008f5, 0x0090d, 0x00913, 0x00925, 0x00929, 0x00937, 0x0093b, 0x0093d, 0x00945, 0x00949, 0x00951, 0x0095b, 0x00973, 0x00975, 0x0097f, 0x00983, 0x0098f, 0x009ab, 0x009ad, 0x009b9, 0x009c7, 0x009d9, 0x009e5, 0x009ef, 0x009f7, 0x00a01, 0x00a07, 0x00a13, 0x00a15, 0x00a29, 0x00a49, 0x00a61, 0x00a6d, 0x00a79, 0x00a7f, 0x00a85, 0x00a91, 0x00a9d, 0x00aa7, 0x00aab, 0x00ab3, 0x00ab5, 0x00ad5, 0x00adf, 0x00ae3, 0x00ae9, 0x00aef, 0x00af1, 0x00afb, 0x00b03, 0x00b09, 0x00b11, 0x00b33, 0x00b3f, 0x00b41, 0x00b4b, 0x00b59, 0x00b5f, 0x00b65, 0x00b6f, 0x00b7d, 0x00b87, 0x00b8b, 0x00b93, 0x00b95, 0x00baf, 0x00bb7, 0x00bbd, 0x00bc9, 0x00bdb, 0x00bdd, 0x00be7, 0x00bed, 0x00c0b, 0x00c0d, 0x00c19, 0x00c1f, 0x00c31, 0x00c57, 0x00c61, 0x00c6b, 0x00c73, 0x00c75, 0x00c85, 0x00c89, 0x00c97, 0x00c9b, 0x00c9d, 0x00cb3, 0x00cbf, 0x00cc7, 0x00ccd, 0x00cd3, 0x00cd5, 0x00ce3, 0x00ce9, 0x00cf7, 0x00d03, 0x00d0f, 0x00d1d, 0x00d27, 0x00d2d, 0x00d41, 0x00d47, 0x00d55, 0x00d59, 0x00d63, 0x00d6f, 0x00d71, 0x00d93, 0x00d9f, 0x00da9, 0x00dbb, 0x00dbd, 0x00dc9, 0x00dd7, 0x00ddb, 0x00de1, 0x00de7, 0x00df5, 0x00dff, 0x00e05, 0x00e1d, 0x00e21, 0x00e27, 0x00e2b, 0x00e33, 0x00e39, 0x00e47, 0x00e4b, 0x00e55, 0x00e5f, 0x00e71, 0x00e7b, 0x00e7d, 0x00e81, 0x00e93, 0x00e9f, 0x00ea3, 0x00ebb, 0x00ec9, 0x00ecf, 0x00edd, 0x00ef3, 0x00ef9, 0x00f0b, 0x00f19, 0x00f31, 0x00f37, 0x00f5d, 0x00f6b, 0x00f6d, 0x00f75, 0x00f79, 0x00f83, 0x00f91, 0x00f97, 0x00f9b, 0x00fa7, 0x00fad, 0x00fb5, 0x00fcd, 0x00fd3, 0x00fe5, 0x00fe9, 0x00ffb }; const word _irreducible_polynomials_degree_12[ 336] = { 335, 0x01009, 0x01017, 0x01021, 0x01033, 0x01035, 0x0103f, 0x0104d, 0x01053, 0x01069, 0x01077, 0x0107b, 0x0107d, 0x01081, 0x0108b, 0x01099, 0x010a3, 0x010a5, 0x010cf, 0x010d1, 0x010eb, 0x010ed, 0x010ff, 0x01107, 0x0111f, 0x01123, 0x01131, 0x01137, 0x0113b, 0x0114f, 0x01157, 0x01161, 0x0116b, 0x0116d, 0x01179, 0x01183, 0x01185, 0x01191, 0x011ab, 0x011b3, 0x011d9, 0x011df, 0x011e3, 0x011ef, 0x011f1, 0x01201, 0x0120d, 0x01213, 0x01219, 0x01225, 0x01237, 0x0123b, 0x0123d, 0x01243, 0x01245, 0x01267, 0x0126d, 0x01273, 0x0127f, 0x012b5, 0x012b9, 0x012c1, 0x012cb, 0x01309, 0x0130f, 0x0131b, 0x0131d, 0x01321, 0x01333, 0x01339, 0x0133f, 0x0134d, 0x01365, 0x01371, 0x01399, 0x0139f, 0x013a3, 0x013a9, 0x013af, 0x013bb, 0x013c3, 0x013d1, 0x013d7, 0x013dd, 0x013f3, 0x013f5, 0x01407, 0x01413, 0x01431, 0x01437, 0x01449, 0x0144f, 0x0145b, 0x0145d, 0x01467, 0x0146b, 0x01475, 0x0147f, 0x01489, 0x014a1, 0x014a7, 0x014ad, 0x014b5, 0x014cd, 0x014d3, 0x014d9, 0x014df, 0x014e5, 0x0150f, 0x0151d, 0x0154b, 0x0154d, 0x01581, 0x01593, 0x015a5, 0x015a9, 0x015bb, 0x015c5, 0x015d7, 0x015dd, 0x015eb, 0x015f9, 0x01603, 0x01609, 0x0161b, 0x0163f, 0x01641, 0x01647, 0x0164b, 0x01655, 0x01659, 0x01663, 0x01665, 0x0166f, 0x01693, 0x016a5, 0x016bd, 0x016c3, 0x016c9, 0x016d1, 0x016e1, 0x016e7, 0x016f3, 0x016ff, 0x0170b, 0x01715, 0x01719, 0x0173d, 0x01743, 0x01745, 0x0174f, 0x01757, 0x0175d, 0x01773, 0x01775, 0x01779, 0x01789, 0x0178f, 0x01797, 0x0179d, 0x017ad, 0x017b3, 0x017bf, 0x017c1, 0x017df, 0x017fb, 0x01807, 0x0180d, 0x0181f, 0x01823, 0x01831, 0x01837, 0x01849, 0x01857, 0x0185d, 0x0186d, 0x01879, 0x0187f, 0x01883, 0x01891, 0x01897, 0x018a1, 0x018b9, 0x018cb, 0x018cd, 0x018ef, 0x018f1, 0x018fb, 0x01905, 0x01909, 0x0191b, 0x0192d, 0x01935, 0x01941, 0x0194b, 0x0195f, 0x01965, 0x0196f, 0x0197b, 0x01981, 0x0198b, 0x01999, 0x019b1, 0x019b7, 0x019bd, 0x019c9, 0x019cf, 0x019dd, 0x019e7, 0x019ed, 0x019f9, 0x01a1b, 0x01a1d, 0x01a21, 0x01a2b, 0x01a33, 0x01a4d, 0x01a53, 0x01a55, 0x01a5f, 0x01a63, 0x01a69, 0x01a7b, 0x01a8b, 0x01ab1, 0x01ac5, 0x01ad1, 0x01ae1, 0x01ae7, 0x01aeb, 0x01af5, 0x01b0b, 0x01b0d, 0x01b13, 0x01b19, 0x01b1f, 0x01b2f, 0x01b45, 0x01b57, 0x01b89, 0x01b8f, 0x01b91, 0x01ba7, 0x01bb5, 0x01bb9, 0x01bbf, 0x01bc1, 0x01bcb, 0x01bd3, 0x01be3, 0x01bfd, 0x01c03, 0x01c05, 0x01c0f, 0x01c11, 0x01c17, 0x01c27, 0x01c4d, 0x01c5f, 0x01c87, 0x01c9f, 0x01ca5, 0x01cb7, 0x01cbb, 0x01cc5, 0x01cc9, 0x01ccf, 0x01cd7, 0x01ceb, 0x01ced, 0x01cf3, 0x01d01, 0x01d07, 0x01d23, 0x01d2f, 0x01d3d, 0x01d43, 0x01d51, 0x01d5b, 0x01d5d, 0x01d67, 0x01d75, 0x01d79, 0x01d83, 0x01d85, 0x01d89, 0x01d91, 0x01da7, 0x01db3, 0x01dc1, 0x01def, 0x01e07, 0x01e15, 0x01e19, 0x01e2f, 0x01e3b, 0x01e3d, 0x01e45, 0x01e51, 0x01e5d, 0x01e61, 0x01e67, 0x01e73, 0x01e8f, 0x01e97, 0x01e9b, 0x01eb9, 0x01ebf, 0x01ecd, 0x01ed3, 0x01ee3, 0x01ef1, 0x01ef7, 0x01f03, 0x01f11, 0x01f1b, 0x01f27, 0x01f39, 0x01f47, 0x01f4b, 0x01f53, 0x01f65, 0x01f71, 0x01f7d, 0x01f81, 0x01f8d, 0x01f99, 0x01faf, 0x01fbb, 0x01fbd, 0x01fc3, 0x01fc5, 0x01fc9, 0x01fe1, 0x01fed, 0x01fff }; const word _irreducible_polynomials_degree_13[ 631] = { 630, 0x0201b, 0x02027, 0x02035, 0x02053, 0x02065, 0x0206f, 0x0208b, 0x0208d, 0x0209f, 0x020a5, 0x020af, 0x020bb, 0x020bd, 0x020c3, 0x020c9, 0x020e1, 0x020f3, 0x0210d, 0x02115, 0x02129, 0x0212f, 0x0213b, 0x02143, 0x02167, 0x0216b, 0x02179, 0x02189, 0x02197, 0x0219d, 0x021bf, 0x021c1, 0x021c7, 0x021cd, 0x021df, 0x021e3, 0x021f1, 0x021fb, 0x02219, 0x02225, 0x02237, 0x0223d, 0x02243, 0x0225b, 0x0225d, 0x02279, 0x0227f, 0x02289, 0x02297, 0x0229b, 0x022b3, 0x022bf, 0x022cd, 0x022ef, 0x022f7, 0x022fb, 0x02305, 0x02327, 0x0232b, 0x02347, 0x02355, 0x02359, 0x0236f, 0x02371, 0x0237d, 0x02387, 0x0238d, 0x02395, 0x023a3, 0x023a9, 0x023b1, 0x023b7, 0x023bb, 0x023e1, 0x023ed, 0x023f9, 0x0240b, 0x02413, 0x0241f, 0x02425, 0x02429, 0x0243d, 0x02451, 0x02457, 0x02461, 0x0246d, 0x0247f, 0x02483, 0x0249b, 0x0249d, 0x024b5, 0x024bf, 0x024c1, 0x024c7, 0x024cb, 0x024e3, 0x02509, 0x02517, 0x0251d, 0x02521, 0x0252d, 0x02539, 0x02553, 0x02555, 0x02563, 0x02571, 0x02577, 0x02587, 0x0258b, 0x02595, 0x02599, 0x0259f, 0x025af, 0x025bd, 0x025c5, 0x025cf, 0x025d7, 0x025eb, 0x02603, 0x02605, 0x02611, 0x0262d, 0x0263f, 0x0264b, 0x02653, 0x02659, 0x02669, 0x02677, 0x0267b, 0x02687, 0x02693, 0x02699, 0x026b1, 0x026b7, 0x026bd, 0x026c3, 0x026eb, 0x026f5, 0x02713, 0x02729, 0x0273b, 0x0274f, 0x02757, 0x0275d, 0x0276b, 0x02773, 0x02779, 0x02783, 0x02791, 0x027a1, 0x027b9, 0x027c7, 0x027cb, 0x027df, 0x027ef, 0x027f1, 0x02807, 0x02819, 0x0281f, 0x02823, 0x02831, 0x0283b, 0x0283d, 0x02845, 0x02867, 0x02875, 0x02885, 0x028ab, 0x028ad, 0x028bf, 0x028cd, 0x028d5, 0x028df, 0x028e3, 0x028e9, 0x028fb, 0x02909, 0x0290f, 0x02911, 0x0291b, 0x0292b, 0x02935, 0x0293f, 0x02941, 0x0294b, 0x02955, 0x02977, 0x0297d, 0x02981, 0x02993, 0x0299f, 0x029af, 0x029b7, 0x029bd, 0x029c3, 0x029d7, 0x029f3, 0x029f5, 0x02a03, 0x02a0f, 0x02a1d, 0x02a21, 0x02a33, 0x02a35, 0x02a4d, 0x02a69, 0x02a6f, 0x02a71, 0x02a7b, 0x02a7d, 0x02aa5, 0x02aa9, 0x02ab1, 0x02ac5, 0x02ad7, 0x02adb, 0x02aeb, 0x02af3, 0x02b01, 0x02b15, 0x02b23, 0x02b25, 0x02b2f, 0x02b37, 0x02b43, 0x02b49, 0x02b6d, 0x02b7f, 0x02b85, 0x02b97, 0x02b9b, 0x02bad, 0x02bb3, 0x02bd9, 0x02be5, 0x02bfd, 0x02c0f, 0x02c21, 0x02c2b, 0x02c2d, 0x02c3f, 0x02c41, 0x02c4d, 0x02c71, 0x02c8b, 0x02c8d, 0x02c95, 0x02ca3, 0x02caf, 0x02cbd, 0x02cc5, 0x02cd1, 0x02cd7, 0x02ce1, 0x02ce7, 0x02ceb, 0x02d0d, 0x02d19, 0x02d29, 0x02d2f, 0x02d37, 0x02d3b, 0x02d45, 0x02d5b, 0x02d67, 0x02d75, 0x02d89, 0x02d8f, 0x02da7, 0x02dab, 0x02db5, 0x02de3, 0x02df1, 0x02dfd, 0x02e07, 0x02e13, 0x02e15, 0x02e29, 0x02e49, 0x02e4f, 0x02e5b, 0x02e5d, 0x02e61, 0x02e6b, 0x02e8f, 0x02e91, 0x02e97, 0x02e9d, 0x02eab, 0x02eb3, 0x02eb9, 0x02edf, 0x02efb, 0x02efd, 0x02f05, 0x02f09, 0x02f11, 0x02f17, 0x02f3f, 0x02f41, 0x02f4b, 0x02f4d, 0x02f59, 0x02f5f, 0x02f65, 0x02f69, 0x02f95, 0x02fa5, 0x02faf, 0x02fb1, 0x02fcf, 0x02fdd, 0x02fe7, 0x02fed, 0x02ff5, 0x02fff, 0x03007, 0x03015, 0x03019, 0x0302f, 0x03049, 0x0304f, 0x03067, 0x03079, 0x0307f, 0x03091, 0x030a1, 0x030b5, 0x030bf, 0x030c1, 0x030d3, 0x030d9, 0x030e5, 0x030ef, 0x03105, 0x0310f, 0x03135, 0x03147, 0x0314d, 0x0315f, 0x03163, 0x03171, 0x0317b, 0x031a3, 0x031a9, 0x031b7, 0x031c5, 0x031c9, 0x031db, 0x031e1, 0x031eb, 0x031ed, 0x031f3, 0x031ff, 0x03209, 0x0320f, 0x0321d, 0x03227, 0x03239, 0x0324b, 0x03253, 0x03259, 0x03265, 0x03281, 0x03293, 0x03299, 0x0329f, 0x032a9, 0x032b7, 0x032bb, 0x032c3, 0x032d7, 0x032db, 0x032e7, 0x03307, 0x03315, 0x0332f, 0x03351, 0x0335d, 0x03375, 0x03397, 0x0339b, 0x033ab, 0x033b9, 0x033c1, 0x033c7, 0x033d5, 0x033e3, 0x033e5, 0x033f7, 0x033fb, 0x03409, 0x0341b, 0x03427, 0x03441, 0x0344d, 0x0345f, 0x03469, 0x03477, 0x0347b, 0x03487, 0x03493, 0x03499, 0x034a5, 0x034bd, 0x034c9, 0x034db, 0x034e7, 0x034f9, 0x0350d, 0x0351f, 0x03525, 0x03531, 0x03537, 0x03545, 0x0354f, 0x0355d, 0x0356d, 0x03573, 0x0357f, 0x0359d, 0x035a1, 0x035b9, 0x035cd, 0x035d5, 0x035d9, 0x035e3, 0x035e9, 0x035ef, 0x03601, 0x0360b, 0x0361f, 0x03625, 0x0362f, 0x0363b, 0x03649, 0x03651, 0x0365b, 0x03673, 0x03675, 0x03691, 0x0369b, 0x0369d, 0x036ad, 0x036cb, 0x036d3, 0x036d5, 0x036e3, 0x036ef, 0x03705, 0x0370f, 0x0371b, 0x03721, 0x0372d, 0x03739, 0x03741, 0x03747, 0x03753, 0x03771, 0x03777, 0x0378b, 0x03795, 0x03799, 0x037a3, 0x037c5, 0x037cf, 0x037d1, 0x037d7, 0x037dd, 0x037e1, 0x037f3, 0x03803, 0x03805, 0x03817, 0x0381d, 0x03827, 0x03833, 0x0384b, 0x03859, 0x03869, 0x03871, 0x038a3, 0x038b1, 0x038bb, 0x038c9, 0x038cf, 0x038e1, 0x038f3, 0x038f9, 0x03901, 0x03907, 0x0390b, 0x03913, 0x03931, 0x0394f, 0x03967, 0x0396d, 0x03983, 0x03985, 0x03997, 0x039a1, 0x039a7, 0x039ad, 0x039cb, 0x039cd, 0x039d3, 0x039ef, 0x039f7, 0x039fd, 0x03a07, 0x03a29, 0x03a2f, 0x03a3d, 0x03a51, 0x03a5d, 0x03a61, 0x03a67, 0x03a73, 0x03a75, 0x03a89, 0x03ab9, 0x03abf, 0x03acd, 0x03ad3, 0x03ad5, 0x03adf, 0x03ae5, 0x03ae9, 0x03afb, 0x03b11, 0x03b2b, 0x03b2d, 0x03b35, 0x03b3f, 0x03b53, 0x03b59, 0x03b63, 0x03b65, 0x03b6f, 0x03b71, 0x03b77, 0x03b8b, 0x03b99, 0x03ba5, 0x03ba9, 0x03bb7, 0x03bbb, 0x03bd1, 0x03be7, 0x03bf3, 0x03bff, 0x03c0d, 0x03c13, 0x03c15, 0x03c1f, 0x03c23, 0x03c25, 0x03c3b, 0x03c4f, 0x03c5d, 0x03c6d, 0x03c83, 0x03c8f, 0x03c9d, 0x03ca7, 0x03cab, 0x03cb9, 0x03cc7, 0x03ce9, 0x03cfb, 0x03cfd, 0x03d03, 0x03d17, 0x03d1b, 0x03d21, 0x03d2d, 0x03d33, 0x03d35, 0x03d41, 0x03d4d, 0x03d65, 0x03d69, 0x03d7d, 0x03d81, 0x03d95, 0x03db1, 0x03db7, 0x03dc3, 0x03dd1, 0x03ddb, 0x03de7, 0x03deb, 0x03df9, 0x03e05, 0x03e09, 0x03e0f, 0x03e1b, 0x03e2b, 0x03e3f, 0x03e41, 0x03e53, 0x03e65, 0x03e69, 0x03e8b, 0x03ea3, 0x03ebd, 0x03ec5, 0x03ed7, 0x03edd, 0x03ee1, 0x03ef9, 0x03f0d, 0x03f19, 0x03f1f, 0x03f25, 0x03f37, 0x03f3d, 0x03f43, 0x03f45, 0x03f49, 0x03f51, 0x03f57, 0x03f61, 0x03f83, 0x03f89, 0x03f91, 0x03fab, 0x03fb5, 0x03fe3, 0x03ff7, 0x03ffd }; const word _irreducible_polynomials_degree_14[1162] = { 1161, 0x04021, 0x0402b, 0x04033, 0x04039, 0x0403f, 0x04053, 0x0405f, 0x04065, 0x0407b, 0x04087, 0x040a9, 0x040af, 0x040bb, 0x040bd, 0x040cf, 0x040d7, 0x040eb, 0x040f3, 0x040f9, 0x0410b, 0x0410d, 0x04113, 0x0413b, 0x04143, 0x04149, 0x04157, 0x04167, 0x0416d, 0x04191, 0x0419b, 0x0419d, 0x041a7, 0x041ab, 0x041ad, 0x041b5, 0x041d5, 0x041d9, 0x041f1, 0x04201, 0x0420d, 0x04245, 0x04249, 0x04257, 0x04261, 0x04273, 0x04279, 0x0427f, 0x04283, 0x04285, 0x0429d, 0x042a1, 0x042c7, 0x042cb, 0x042cd, 0x042e3, 0x042e9, 0x042ef, 0x04303, 0x04309, 0x0431b, 0x04321, 0x04327, 0x0433f, 0x04369, 0x0437b, 0x0437d, 0x04387, 0x04395, 0x043af, 0x043c9, 0x043dd, 0x043eb, 0x043ed, 0x043ff, 0x0440b, 0x0440d, 0x04443, 0x0445d, 0x04473, 0x04489, 0x044c1, 0x044d3, 0x044d5, 0x044df, 0x044e3, 0x044f1, 0x044fb, 0x04503, 0x04509, 0x0450f, 0x0452b, 0x04539, 0x04559, 0x0456f, 0x04577, 0x0457d, 0x0458d, 0x04599, 0x0459f, 0x045a5, 0x045b7, 0x045c5, 0x045c9, 0x045d1, 0x045d7, 0x045e7, 0x045f3, 0x045ff, 0x0460f, 0x0461d, 0x04627, 0x04635, 0x04647, 0x04659, 0x04663, 0x04671, 0x0467b, 0x0468d, 0x04693, 0x04695, 0x046a3, 0x046a5, 0x046c5, 0x046cf, 0x046db, 0x046eb, 0x046ff, 0x04725, 0x04731, 0x04743, 0x0474f, 0x0477f, 0x04789, 0x04791, 0x0479b, 0x047a7, 0x047b5, 0x047c1, 0x047d3, 0x047d9, 0x047e5, 0x047e9, 0x047ef, 0x047fd, 0x04813, 0x04819, 0x0481f, 0x0483b, 0x04843, 0x04851, 0x0485b, 0x04861, 0x04867, 0x0487f, 0x04883, 0x04891, 0x048bf, 0x048c7, 0x048e3, 0x048f1, 0x048f7, 0x048fd, 0x0491d, 0x04921, 0x0492d, 0x04939, 0x04941, 0x04953, 0x0495f, 0x04965, 0x04969, 0x04977, 0x04987, 0x0498b, 0x049a9, 0x049cf, 0x049d1, 0x049dd, 0x049e1, 0x049e7, 0x04a17, 0x04a35, 0x04a59, 0x04a65, 0x04a7b, 0x04a81, 0x04a87, 0x04a8b, 0x04a93, 0x04aa3, 0x04aaf, 0x04abd, 0x04ac9, 0x04add, 0x04aed, 0x04af9, 0x04b1f, 0x04b37, 0x04b49, 0x04b5b, 0x04b61, 0x04b6b, 0x04b6d, 0x04b73, 0x04b79, 0x04b85, 0x04b8f, 0x04b97, 0x04ba1, 0x04bab, 0x04bb3, 0x04bb9, 0x04bcb, 0x04bcd, 0x04bd5, 0x04bdf, 0x04bf1, 0x04bfb, 0x04c09, 0x04c1b, 0x04c1d, 0x04c27, 0x04c2b, 0x04c3f, 0x04c47, 0x04c4b, 0x04c53, 0x04c55, 0x04c65, 0x04c7d, 0x04c8d, 0x04ca5, 0x04cbd, 0x04cc5, 0x04cd1, 0x04cd7, 0x04ced, 0x04cff, 0x04d15, 0x04d1f, 0x04d29, 0x04d31, 0x04d45, 0x04d4f, 0x04d51, 0x04d75, 0x04d85, 0x04d8f, 0x04dad, 0x04db3, 0x04dc1, 0x04dd3, 0x04df1, 0x04dfb, 0x04e01, 0x04e15, 0x04e23, 0x04e37, 0x04e43, 0x04e49, 0x04e51, 0x04e6b, 0x04e6d, 0x04e83, 0x04e9b, 0x04eb3, 0x04ed3, 0x04ee5, 0x04ee9, 0x04eef, 0x04f1d, 0x04f21, 0x04f4b, 0x04f55, 0x04f5f, 0x04f69, 0x04f6f, 0x04f81, 0x04f8d, 0x04f9f, 0x04fa5, 0x04fa9, 0x04fb7, 0x04fbb, 0x04fd7, 0x04feb, 0x04fed, 0x04ff9, 0x05007, 0x0501f, 0x05025, 0x0505b, 0x0505d, 0x050a1, 0x050cb, 0x050d5, 0x050d9, 0x050e3, 0x050e9, 0x050ef, 0x050fb, 0x0511b, 0x05121, 0x0512d, 0x0513f, 0x05153, 0x05159, 0x0515f, 0x0516f, 0x0517d, 0x0518b, 0x05199, 0x051a5, 0x051b1, 0x051b7, 0x051cf, 0x051d1, 0x051db, 0x051eb, 0x051ed, 0x051f5, 0x051ff, 0x05205, 0x0520f, 0x0522d, 0x05265, 0x05271, 0x0528b, 0x05299, 0x052a3, 0x052b1, 0x052bd, 0x052c3, 0x052c5, 0x052cf, 0x052d1, 0x052e7, 0x052eb, 0x052f9, 0x05301, 0x05319, 0x05325, 0x05329, 0x05337, 0x05349, 0x0535b, 0x05367, 0x05375, 0x0539b, 0x0539d, 0x053a7, 0x053b9, 0x053bf, 0x053c7, 0x053d3, 0x053e5, 0x053f1, 0x053fd, 0x05403, 0x0540f, 0x05417, 0x05435, 0x05439, 0x05447, 0x05459, 0x0545f, 0x0547b, 0x0548d, 0x05493, 0x05495, 0x0549f, 0x054b1, 0x054b7, 0x054bb, 0x054dd, 0x054e1, 0x054eb, 0x05513, 0x05519, 0x0552f, 0x05537, 0x05567, 0x0556b, 0x05575, 0x05579, 0x0557f, 0x05583, 0x05585, 0x05591, 0x05597, 0x055ad, 0x055c1, 0x055d3, 0x055df, 0x055e9, 0x055f7, 0x05607, 0x0560d, 0x05615, 0x05623, 0x05629, 0x0562f, 0x05631, 0x05657, 0x0565d, 0x0569b, 0x056ab, 0x056ad, 0x056b3, 0x056c1, 0x056c7, 0x056cd, 0x056f1, 0x056f7, 0x056fb, 0x0572b, 0x0572d, 0x0573f, 0x0574b, 0x05755, 0x05759, 0x05763, 0x05765, 0x0577d, 0x05793, 0x057af, 0x057bd, 0x057c3, 0x057c5, 0x057d7, 0x057e7, 0x057f3, 0x05803, 0x05811, 0x05821, 0x0582d, 0x05833, 0x05835, 0x0583f, 0x05841, 0x0587d, 0x05887, 0x0588b, 0x05895, 0x05899, 0x058a3, 0x058af, 0x058b1, 0x058d1, 0x058dd, 0x058e7, 0x058eb, 0x058f9, 0x058ff, 0x05913, 0x05923, 0x0593b, 0x0594f, 0x0597f, 0x0598f, 0x05997, 0x0599d, 0x059a1, 0x059a7, 0x059ab, 0x059b5, 0x059cb, 0x059df, 0x059e9, 0x059fd, 0x05a0d, 0x05a13, 0x05a1f, 0x05a25, 0x05a3b, 0x05a3d, 0x05a43, 0x05a45, 0x05a49, 0x05a75, 0x05a8f, 0x05aa7, 0x05ab5, 0x05ac1, 0x05ad3, 0x05ad5, 0x05ad9, 0x05af7, 0x05b0f, 0x05b17, 0x05b1d, 0x05b2b, 0x05b33, 0x05b39, 0x05b41, 0x05b63, 0x05b69, 0x05b77, 0x05b99, 0x05ba3, 0x05ba9, 0x05bc3, 0x05bc5, 0x05be1, 0x05beb, 0x05bf9, 0x05c07, 0x05c0b, 0x05c19, 0x05c31, 0x05c37, 0x05c49, 0x05c5d, 0x05c6d, 0x05c79, 0x05c83, 0x05ca1, 0x05cbf, 0x05cc1, 0x05ccb, 0x05ccd, 0x05ce5, 0x05d05, 0x05d11, 0x05d1d, 0x05d33, 0x05d35, 0x05d47, 0x05d6f, 0x05d87, 0x05d8d, 0x05d95, 0x05d9f, 0x05da3, 0x05da9, 0x05dbb, 0x05dc9, 0x05dcf, 0x05ddb, 0x05de1, 0x05e03, 0x05e0f, 0x05e1b, 0x05e2d, 0x05e47, 0x05e4b, 0x05e63, 0x05e7d, 0x05e81, 0x05e99, 0x05ea5, 0x05ea9, 0x05ed7, 0x05edb, 0x05ee7, 0x05ef5, 0x05f07, 0x05f0d, 0x05f19, 0x05f1f, 0x05f23, 0x05f3d, 0x05f43, 0x05f45, 0x05f51, 0x05f61, 0x05f6b, 0x05f75, 0x05f89, 0x05f97, 0x05fab, 0x05fcd, 0x05fdf, 0x05fe3, 0x05fe5, 0x05ff1, 0x0600b, 0x0600d, 0x06015, 0x0602f, 0x0603d, 0x0604f, 0x06051, 0x06061, 0x06067, 0x0606b, 0x06089, 0x06097, 0x0609d, 0x060a1, 0x060ab, 0x060b3, 0x060b9, 0x060d3, 0x060d5, 0x060f7, 0x06109, 0x06111, 0x0612d, 0x06139, 0x06141, 0x06153, 0x06171, 0x0617d, 0x0619f, 0x061a5, 0x061af, 0x061bb, 0x061db, 0x061ed, 0x061f5, 0x061ff, 0x0622b, 0x06233, 0x06235, 0x06239, 0x06247, 0x0624d, 0x06263, 0x0626f, 0x0627d, 0x0628d, 0x0629f, 0x062a5, 0x062a9, 0x062b1, 0x062b7, 0x062d7, 0x062db, 0x062dd, 0x062eb, 0x062ed, 0x062f3, 0x062ff, 0x06323, 0x06331, 0x06337, 0x0633d, 0x0635b, 0x0636d, 0x06375, 0x06385, 0x06389, 0x06391, 0x063a1, 0x063bf, 0x063ef, 0x063fd, 0x06409, 0x06417, 0x0642d, 0x06441, 0x06447, 0x0644d, 0x06455, 0x0648b, 0x06495, 0x0649f, 0x064a9, 0x064b1, 0x064e7, 0x064f5, 0x06501, 0x0650b, 0x06519, 0x06537, 0x0653b, 0x06543, 0x06545, 0x06549, 0x0655b, 0x06583, 0x06591, 0x065a7, 0x065ad, 0x065b9, 0x065bf, 0x065d3, 0x065d5, 0x065d9, 0x065e5, 0x065ef, 0x065f1, 0x065f7, 0x06601, 0x06607, 0x0660d, 0x06623, 0x0663b, 0x0664f, 0x0665d, 0x0666b, 0x0666d, 0x06673, 0x06683, 0x0668f, 0x06697, 0x066a7, 0x066ab, 0x066b5, 0x066b9, 0x066d9, 0x066df, 0x066e9, 0x06711, 0x0671b, 0x06721, 0x06733, 0x0674b, 0x0675f, 0x06769, 0x06777, 0x06781, 0x067a3, 0x067af, 0x067b7, 0x067d1, 0x067e7, 0x067eb, 0x067f5, 0x06803, 0x06811, 0x0681b, 0x0681d, 0x0682b, 0x06841, 0x06853, 0x0686f, 0x06877, 0x0688b, 0x0688d, 0x06893, 0x0689f, 0x068a5, 0x068a9, 0x068c5, 0x068c9, 0x068ff, 0x06907, 0x06919, 0x0692f, 0x0693d, 0x06957, 0x0696b, 0x06973, 0x06975, 0x06979, 0x06985, 0x0698f, 0x0699b, 0x0699d, 0x069a1, 0x069ab, 0x069c7, 0x069cd, 0x069e9, 0x06a01, 0x06a07, 0x06a0b, 0x06a19, 0x06a23, 0x06a2f, 0x06a51, 0x06a5b, 0x06a67, 0x06a6b, 0x06a6d, 0x06a75, 0x06a83, 0x06a97, 0x06ab3, 0x06ab5, 0x06abf, 0x06ac1, 0x06acb, 0x06acd, 0x06ae9, 0x06aef, 0x06afb, 0x06afd, 0x06b03, 0x06b17, 0x06b2b, 0x06b33, 0x06b39, 0x06b47, 0x06b4b, 0x06b55, 0x06b69, 0x06b7d, 0x06b81, 0x06b8d, 0x06b95, 0x06b9f, 0x06ba3, 0x06ba5, 0x06bb1, 0x06bb7, 0x06bc5, 0x06bd7, 0x06be1, 0x06bed, 0x06bf3, 0x06bf9, 0x06c0b, 0x06c19, 0x06c2f, 0x06c3d, 0x06c45, 0x06c57, 0x06c61, 0x06c73, 0x06cb5, 0x06cb9, 0x06cbf, 0x06cc1, 0x06cc7, 0x06ccb, 0x06ce5, 0x06cf1, 0x06d05, 0x06d09, 0x06d0f, 0x06d17, 0x06d2b, 0x06d53, 0x06d63, 0x06d65, 0x06d69, 0x06d7b, 0x06da3, 0x06db1, 0x06dbb, 0x06dbd, 0x06dc3, 0x06dc5, 0x06ddd, 0x06e09, 0x06e27, 0x06e2d, 0x06e33, 0x06e41, 0x06e4d, 0x06e53, 0x06e6f, 0x06e7b, 0x06e81, 0x06e87, 0x06e95, 0x06eaf, 0x06eb7, 0x06ec3, 0x06edb, 0x06edd, 0x06ef9, 0x06f01, 0x06f15, 0x06f29, 0x06f31, 0x06f3b, 0x06f4f, 0x06f57, 0x06f5b, 0x06f61, 0x06f85, 0x06f91, 0x06f97, 0x06fab, 0x06fb5, 0x06fc7, 0x06fd9, 0x06fdf, 0x06fe9, 0x07005, 0x0701d, 0x0702b, 0x07033, 0x07035, 0x0704b, 0x0707d, 0x07081, 0x0708d, 0x070a9, 0x070b7, 0x070bb, 0x070c9, 0x070cf, 0x070dd, 0x070e1, 0x07113, 0x07115, 0x07119, 0x07123, 0x07131, 0x07137, 0x0713d, 0x0715d, 0x0716b, 0x07189, 0x07197, 0x0719b, 0x071a1, 0x071b5, 0x071bf, 0x071cb, 0x071df, 0x071e5, 0x071f7, 0x071fb, 0x07219, 0x0721f, 0x0722f, 0x07231, 0x0723b, 0x0724f, 0x07261, 0x07267, 0x0728f, 0x072ad, 0x072b3, 0x072c1, 0x072cd, 0x072d3, 0x072df, 0x072e5, 0x072f1, 0x072f7, 0x07303, 0x07309, 0x07327, 0x0732b, 0x07341, 0x07355, 0x07365, 0x0736f, 0x0738d, 0x07393, 0x073a5, 0x073bd, 0x073c9, 0x073d1, 0x073e7, 0x073f3, 0x073f5, 0x073ff, 0x07413, 0x07415, 0x07429, 0x0744f, 0x07457, 0x0745b, 0x0746b, 0x0746d, 0x07483, 0x074ab, 0x074b3, 0x074c7, 0x074cd, 0x074d5, 0x074df, 0x074e9, 0x074fb, 0x074fd, 0x07517, 0x0751b, 0x07521, 0x07535, 0x07541, 0x0754b, 0x07577, 0x0757b, 0x07581, 0x07599, 0x075a3, 0x075bd, 0x075cf, 0x075d1, 0x075eb, 0x075f5, 0x075f9, 0x075ff, 0x0761d, 0x07639, 0x07647, 0x07653, 0x07655, 0x0765f, 0x07663, 0x07665, 0x07669, 0x07687, 0x07695, 0x0769f, 0x076a3, 0x076af, 0x076bb, 0x076c5, 0x076d1, 0x076eb, 0x076f3, 0x076f9, 0x0770b, 0x0772f, 0x07749, 0x07751, 0x07757, 0x0776d, 0x07773, 0x07783, 0x07789, 0x077a7, 0x077ad, 0x077b5, 0x077c7, 0x077d3, 0x077d5, 0x077ef, 0x07815, 0x07825, 0x07831, 0x0783d, 0x0784f, 0x07851, 0x0785b, 0x0786d, 0x078a7, 0x078ad, 0x078b3, 0x078bf, 0x078cb, 0x078cd, 0x078d9, 0x078df, 0x078e9, 0x078ef, 0x07903, 0x0790f, 0x07917, 0x07927, 0x07933, 0x0794d, 0x07959, 0x0796f, 0x07971, 0x0797b, 0x07981, 0x07987, 0x079a5, 0x079b1, 0x079c5, 0x079c9, 0x079d7, 0x079dd, 0x07a03, 0x07a1b, 0x07a27, 0x07a2b, 0x07a35, 0x07a3f, 0x07a4b, 0x07a55, 0x07a77, 0x07a81, 0x07a8d, 0x07aa9, 0x07ab7, 0x07abb, 0x07ac3, 0x07ae1, 0x07af3, 0x07af5, 0x07b0b, 0x07b23, 0x07b3b, 0x07b45, 0x07b4f, 0x07b51, 0x07b5d, 0x07b67, 0x07b79, 0x07b85, 0x07b8f, 0x07ba1, 0x07bab, 0x07bb9, 0x07bd3, 0x07be3, 0x07bf1, 0x07bf7, 0x07c05, 0x07c09, 0x07c27, 0x07c2d, 0x07c59, 0x07c69, 0x07c7d, 0x07c8b, 0x07c93, 0x07c95, 0x07ca3, 0x07cb7, 0x07cc3, 0x07cd1, 0x07cdd, 0x07ceb, 0x07cf9, 0x07cff, 0x07d01, 0x07d15, 0x07d37, 0x07d45, 0x07d49, 0x07d73, 0x07d79, 0x07d8f, 0x07d91, 0x07d97, 0x07da7, 0x07db3, 0x07dc7, 0x07dcd, 0x07dd5, 0x07de9, 0x07dfb, 0x07dfd, 0x07e01, 0x07e0d, 0x07e19, 0x07e2f, 0x07e45, 0x07e61, 0x07e75, 0x07e7f, 0x07e89, 0x07e8f, 0x07e9b, 0x07e9d, 0x07eab, 0x07ebf, 0x07ec7, 0x07ed3, 0x07ee3, 0x07ee5, 0x07f09, 0x07f21, 0x07f3f, 0x07f4d, 0x07f55, 0x07f71, 0x07f8b, 0x07f8d, 0x07f99, 0x07f9f, 0x07fa3, 0x07fb1, 0x07fc3, 0x07fc5, 0x07fd1, 0x07fd7, 0x07fe1, 0x07fe7 }; const word _irreducible_polynomials_degree_15[2183] = { 2182, 0x08003, 0x08011, 0x08017, 0x0802d, 0x08035, 0x0805f, 0x0806f, 0x08077, 0x08081, 0x08087, 0x08093, 0x080a5, 0x080b1, 0x080bd, 0x080c3, 0x080cf, 0x080dd, 0x080e7, 0x080f5, 0x080ff, 0x08101, 0x08115, 0x08125, 0x08157, 0x0815d, 0x08161, 0x0816d, 0x08179, 0x08185, 0x081a1, 0x081a7, 0x081b9, 0x081cb, 0x081cd, 0x081d5, 0x081df, 0x081fd, 0x08213, 0x0823b, 0x08245, 0x0827f, 0x08289, 0x0828f, 0x0829b, 0x082cb, 0x082d9, 0x082e3, 0x082f1, 0x08317, 0x0831b, 0x0832b, 0x08333, 0x08347, 0x0834d, 0x0835f, 0x08363, 0x08369, 0x08371, 0x0838b, 0x08395, 0x08399, 0x083af, 0x083bb, 0x083bd, 0x083c5, 0x083d1, 0x083f5, 0x08419, 0x08423, 0x0842f, 0x08431, 0x08437, 0x08449, 0x08467, 0x0846d, 0x08479, 0x08483, 0x08485, 0x08497, 0x084a1, 0x084b5, 0x084df, 0x084ef, 0x084f7, 0x084fb, 0x084fd, 0x0851d, 0x08521, 0x08527, 0x08533, 0x08547, 0x0854b, 0x0855f, 0x08571, 0x0857b, 0x08581, 0x0858d, 0x085a3, 0x085b1, 0x085b7, 0x085c5, 0x085c9, 0x085db, 0x085ed, 0x085f3, 0x08609, 0x08611, 0x0861d, 0x0862b, 0x08655, 0x08659, 0x08665, 0x0867d, 0x08681, 0x08693, 0x086a9, 0x086af, 0x086b7, 0x086c3, 0x086db, 0x086f3, 0x086ff, 0x0870b, 0x0870d, 0x08715, 0x0871f, 0x08729, 0x08731, 0x0876b, 0x08785, 0x08789, 0x0878f, 0x0879d, 0x087ab, 0x087ad, 0x087b3, 0x087bf, 0x087df, 0x087e9, 0x087f1, 0x087f7, 0x08801, 0x08807, 0x08825, 0x08837, 0x0883b, 0x08849, 0x0885d, 0x08861, 0x08873, 0x08879, 0x08883, 0x0888f, 0x08891, 0x088b3, 0x088b5, 0x088c7, 0x088d9, 0x08903, 0x08905, 0x08909, 0x08911, 0x08917, 0x08927, 0x08965, 0x0897d, 0x08995, 0x08999, 0x089af, 0x089cf, 0x089d1, 0x089db, 0x089dd, 0x089ed, 0x089ff, 0x08a17, 0x08a39, 0x08a55, 0x08a63, 0x08a65, 0x08a71, 0x08a7b, 0x08a87, 0x08a8b, 0x08a9f, 0x08aaf, 0x08ab1, 0x08add, 0x08b0b, 0x08b0d, 0x08b3b, 0x08b3d, 0x08b43, 0x08b6d, 0x08b73, 0x08b91, 0x08b9d, 0x08bc1, 0x08bcb, 0x08bd5, 0x08bd9, 0x08be5, 0x08bf1, 0x08c21, 0x08c27, 0x08c47, 0x08c55, 0x08c59, 0x08c63, 0x08c69, 0x08c87, 0x08c93, 0x08ca3, 0x08ca9, 0x08cbb, 0x08cc9, 0x08ce1, 0x08ceb, 0x08d01, 0x08d07, 0x08d0b, 0x08d25, 0x08d2f, 0x08d43, 0x08d51, 0x08d5b, 0x08d6d, 0x08d7f, 0x08d85, 0x08d89, 0x08d9b, 0x08da1, 0x08dbf, 0x08dcb, 0x08dcd, 0x08de5, 0x08de9, 0x08def, 0x08df7, 0x08e0b, 0x08e1f, 0x08e3b, 0x08e51, 0x08e57, 0x08e73, 0x08e75, 0x08e8f, 0x08e97, 0x08e9b, 0x08ea1, 0x08ebf, 0x08ec1, 0x08ed3, 0x08edf, 0x08ee5, 0x08efb, 0x08f0f, 0x08f27, 0x08f2b, 0x08f39, 0x08f41, 0x08f53, 0x08f55, 0x08f65, 0x08f77, 0x08f7d, 0x08fa5, 0x08fb7, 0x08fbd, 0x08fc9, 0x08fd1, 0x08fe1, 0x08fe7, 0x0900b, 0x09019, 0x0901f, 0x09025, 0x09037, 0x0903d, 0x09043, 0x09057, 0x09061, 0x0906d, 0x09075, 0x09089, 0x09091, 0x0909b, 0x0909d, 0x090ad, 0x090e3, 0x09105, 0x09109, 0x0910f, 0x09117, 0x0911b, 0x0911d, 0x09141, 0x0914d, 0x09177, 0x0917b, 0x09199, 0x091a3, 0x091a9, 0x091b1, 0x091bd, 0x091cf, 0x091e1, 0x09203, 0x09211, 0x0921b, 0x09221, 0x09227, 0x09233, 0x0923f, 0x0924d, 0x09253, 0x0925f, 0x0927b, 0x09287, 0x09295, 0x092a3, 0x092b7, 0x092c9, 0x092dd, 0x092f5, 0x092f9, 0x09313, 0x09331, 0x09337, 0x09345, 0x09349, 0x09357, 0x09373, 0x0937f, 0x09383, 0x0939d, 0x093a1, 0x093b5, 0x093cb, 0x093df, 0x093ef, 0x093f1, 0x093fb, 0x0940f, 0x0942b, 0x0942d, 0x09435, 0x0944b, 0x09453, 0x09477, 0x0947d, 0x0948b, 0x09493, 0x09499, 0x094a5, 0x094a9, 0x094bb, 0x094c5, 0x094d7, 0x094db, 0x094e1, 0x094e7, 0x094f5, 0x094ff, 0x09507, 0x09515, 0x09529, 0x09531, 0x0953d, 0x09545, 0x09561, 0x0956b, 0x09589, 0x0959b, 0x095ad, 0x095cd, 0x095df, 0x095e3, 0x095e9, 0x09613, 0x09615, 0x0962f, 0x09631, 0x09637, 0x0963d, 0x09643, 0x0965d, 0x09667, 0x09679, 0x09697, 0x096ad, 0x096c1, 0x096cb, 0x096d3, 0x096df, 0x096ef, 0x096fb, 0x09705, 0x0972b, 0x09735, 0x09739, 0x0974d, 0x09763, 0x09795, 0x0979f, 0x097a3, 0x097a5, 0x097a9, 0x097b1, 0x097db, 0x097dd, 0x097e1, 0x097f3, 0x097ff, 0x09809, 0x09821, 0x0982d, 0x09847, 0x09895, 0x09899, 0x098af, 0x098bb, 0x098c3, 0x098c5, 0x098d7, 0x098dd, 0x098e7, 0x098f3, 0x098f9, 0x0990b, 0x09919, 0x09925, 0x09929, 0x0992f, 0x09937, 0x09943, 0x09945, 0x0995b, 0x0996d, 0x0997f, 0x09989, 0x09991, 0x0999b, 0x099b9, 0x099bf, 0x099c1, 0x099cb, 0x099d5, 0x099e3, 0x099e5, 0x09a25, 0x09a31, 0x09a4f, 0x09a57, 0x09a5b, 0x09a5d, 0x09a61, 0x09a6d, 0x09a79, 0x09a7f, 0x09a85, 0x09a97, 0x09a9d, 0x09aab, 0x09ab9, 0x09ac7, 0x09acb, 0x09ae3, 0x09afb, 0x09b11, 0x09b1b, 0x09b2d, 0x09b33, 0x09b41, 0x09b53, 0x09b5f, 0x09b6f, 0x09b77, 0x09b7d, 0x09b87, 0x09b8b, 0x09b93, 0x09bb7, 0x09bbd, 0x09bd1, 0x09beb, 0x09bf9, 0x09bff, 0x09c15, 0x09c1f, 0x09c2f, 0x09c3b, 0x09c3d, 0x09c51, 0x09c5d, 0x09c67, 0x09c73, 0x09c75, 0x09c79, 0x09c85, 0x09cbf, 0x09cc7, 0x09cd5, 0x09ce5, 0x09ce9, 0x09cf1, 0x09cf7, 0x09d03, 0x09d55, 0x09d59, 0x09d63, 0x09d77, 0x09d81, 0x09d8b, 0x09d99, 0x09d9f, 0x09da5, 0x09dbd, 0x09e03, 0x09e11, 0x09e17, 0x09e21, 0x09e27, 0x09e39, 0x09e47, 0x09e55, 0x09e59, 0x09e5f, 0x09e63, 0x09e69, 0x09e81, 0x09e9f, 0x09ea3, 0x09eed, 0x09ef3, 0x09f0b, 0x09f13, 0x09f19, 0x09f23, 0x09f2f, 0x09f49, 0x09f4f, 0x09f67, 0x09f9b, 0x09fad, 0x09fb3, 0x09fb5, 0x09fcd, 0x09fd9, 0x09ff7, 0x0a00b, 0x0a015, 0x0a023, 0x0a025, 0x0a02f, 0x0a043, 0x0a05b, 0x0a05d, 0x0a067, 0x0a06b, 0x0a06d, 0x0a07f, 0x0a089, 0x0a091, 0x0a0a7, 0x0a0b5, 0x0a0cd, 0x0a0df, 0x0a0e9, 0x0a0fb, 0x0a103, 0x0a10f, 0x0a11d, 0x0a121, 0x0a127, 0x0a133, 0x0a135, 0x0a139, 0x0a147, 0x0a159, 0x0a16f, 0x0a17b, 0x0a181, 0x0a19f, 0x0a1b1, 0x0a1c5, 0x0a1cf, 0x0a1e1, 0x0a20f, 0x0a21d, 0x0a22d, 0x0a235, 0x0a241, 0x0a247, 0x0a255, 0x0a263, 0x0a26f, 0x0a299, 0x0a2a9, 0x0a2bb, 0x0a2bd, 0x0a2c5, 0x0a2c9, 0x0a2d7, 0x0a2db, 0x0a2ed, 0x0a307, 0x0a30b, 0x0a30d, 0x0a319, 0x0a31f, 0x0a323, 0x0a329, 0x0a345, 0x0a36b, 0x0a373, 0x0a385, 0x0a38f, 0x0a397, 0x0a3a1, 0x0a3b3, 0x0a3c1, 0x0a3cd, 0x0a3d5, 0x0a403, 0x0a405, 0x0a409, 0x0a411, 0x0a453, 0x0a459, 0x0a465, 0x0a481, 0x0a487, 0x0a48b, 0x0a495, 0x0a499, 0x0a4a5, 0x0a4af, 0x0a4b1, 0x0a4bd, 0x0a4c3, 0x0a4db, 0x0a4eb, 0x0a4ed, 0x0a501, 0x0a523, 0x0a525, 0x0a529, 0x0a54f, 0x0a573, 0x0a583, 0x0a5a7, 0x0a5b9, 0x0a5c7, 0x0a5cd, 0x0a5e9, 0x0a5f1, 0x0a5fb, 0x0a613, 0x0a625, 0x0a62f, 0x0a637, 0x0a63d, 0x0a651, 0x0a661, 0x0a673, 0x0a691, 0x0a69b, 0x0a69d, 0x0a6c7, 0x0a6f1, 0x0a6fd, 0x0a71b, 0x0a72b, 0x0a739, 0x0a75f, 0x0a771, 0x0a78d, 0x0a799, 0x0a7b1, 0x0a7bb, 0x0a7bd, 0x0a7cf, 0x0a7d1, 0x0a7dd, 0x0a7e7, 0x0a805, 0x0a82b, 0x0a833, 0x0a839, 0x0a84b, 0x0a84d, 0x0a855, 0x0a869, 0x0a86f, 0x0a87d, 0x0a881, 0x0a893, 0x0a89f, 0x0a8a3, 0x0a8a9, 0x0a8b7, 0x0a8cf, 0x0a8dd, 0x0a8e1, 0x0a8eb, 0x0a90b, 0x0a913, 0x0a919, 0x0a925, 0x0a949, 0x0a975, 0x0a991, 0x0a9a7, 0x0a9ad, 0x0a9bf, 0x0a9c1, 0x0a9e9, 0x0a9ef, 0x0a9f7, 0x0aa0d, 0x0aa15, 0x0aa2f, 0x0aa31, 0x0aa43, 0x0aa45, 0x0aa4f, 0x0aa51, 0x0aa61, 0x0aa79, 0x0aa9d, 0x0aaab, 0x0aaad, 0x0aab3, 0x0aab9, 0x0aad3, 0x0aadf, 0x0aaf1, 0x0ab03, 0x0ab0f, 0x0ab1d, 0x0ab35, 0x0ab39, 0x0ab47, 0x0ab63, 0x0ab7d, 0x0ab81, 0x0ab8b, 0x0ab99, 0x0abbb, 0x0abc5, 0x0abd1, 0x0abd7, 0x0abdb, 0x0abe7, 0x0abf3, 0x0ac01, 0x0ac07, 0x0ac0d, 0x0ac13, 0x0ac29, 0x0ac37, 0x0ac3b, 0x0ac45, 0x0ac5b, 0x0ac6d, 0x0ac73, 0x0ac7f, 0x0ac85, 0x0ac8f, 0x0ac97, 0x0acc7, 0x0acd5, 0x0acdf, 0x0ace9, 0x0acfd, 0x0ad05, 0x0ad11, 0x0ad1b, 0x0ad21, 0x0ad4b, 0x0ad4d, 0x0ad6f, 0x0ada3, 0x0adaf, 0x0adc9, 0x0addd, 0x0ade7, 0x0adeb, 0x0adf9, 0x0ae09, 0x0ae1d, 0x0ae2d, 0x0ae39, 0x0ae3f, 0x0ae4d, 0x0ae5f, 0x0ae63, 0x0ae71, 0x0ae77, 0x0ae7b, 0x0ae8d, 0x0ae95, 0x0aea3, 0x0aebb, 0x0aed7, 0x0aedd, 0x0aee7, 0x0aef5, 0x0af01, 0x0af13, 0x0af1f, 0x0af29, 0x0af2f, 0x0af3b, 0x0af43, 0x0af49, 0x0af67, 0x0af75, 0x0af8f, 0x0af9d, 0x0afab, 0x0afc1, 0x0afd3, 0x0afe3, 0x0aff7, 0x0affd, 0x0b00f, 0x0b01b, 0x0b02d, 0x0b035, 0x0b03f, 0x0b055, 0x0b077, 0x0b08d, 0x0b093, 0x0b0af, 0x0b0c3, 0x0b0c5, 0x0b0d1, 0x0b0e1, 0x0b0f3, 0x0b107, 0x0b10b, 0x0b10d, 0x0b13d, 0x0b157, 0x0b15d, 0x0b175, 0x0b18f, 0x0b197, 0x0b19d, 0x0b1a1, 0x0b1c7, 0x0b1df, 0x0b1e5, 0x0b20b, 0x0b213, 0x0b215, 0x0b243, 0x0b249, 0x0b24f, 0x0b275, 0x0b27f, 0x0b289, 0x0b29b, 0x0b2b5, 0x0b2c1, 0x0b2df, 0x0b2e9, 0x0b2ef, 0x0b2f7, 0x0b305, 0x0b317, 0x0b327, 0x0b363, 0x0b37b, 0x0b381, 0x0b393, 0x0b3a5, 0x0b3a9, 0x0b3b1, 0x0b3b7, 0x0b3c5, 0x0b3d7, 0x0b3db, 0x0b3dd, 0x0b3f9, 0x0b401, 0x0b40d, 0x0b419, 0x0b41f, 0x0b429, 0x0b443, 0x0b445, 0x0b457, 0x0b46d, 0x0b475, 0x0b47f, 0x0b4a7, 0x0b4bf, 0x0b4d9, 0x0b4ef, 0x0b4f7, 0x0b509, 0x0b51d, 0x0b533, 0x0b53f, 0x0b547, 0x0b553, 0x0b555, 0x0b569, 0x0b577, 0x0b57b, 0x0b595, 0x0b5a9, 0x0b5bb, 0x0b5c3, 0x0b5d7, 0x0b5e1, 0x0b5e7, 0x0b5eb, 0x0b5f9, 0x0b605, 0x0b609, 0x0b617, 0x0b61b, 0x0b621, 0x0b627, 0x0b62d, 0x0b633, 0x0b635, 0x0b64b, 0x0b659, 0x0b67b, 0x0b681, 0x0b68b, 0x0b699, 0x0b6af, 0x0b6b1, 0x0b6d1, 0x0b6e7, 0x0b6eb, 0x0b707, 0x0b71f, 0x0b725, 0x0b745, 0x0b757, 0x0b75b, 0x0b76b, 0x0b779, 0x0b783, 0x0b791, 0x0b797, 0x0b79d, 0x0b7a1, 0x0b7b3, 0x0b7cb, 0x0b7fd, 0x0b81f, 0x0b845, 0x0b84f, 0x0b861, 0x0b875, 0x0b885, 0x0b889, 0x0b89d, 0x0b8a1, 0x0b8ad, 0x0b8bf, 0x0b8d3, 0x0b8d5, 0x0b8fb, 0x0b903, 0x0b909, 0x0b91d, 0x0b927, 0x0b92b, 0x0b947, 0x0b955, 0x0b959, 0x0b965, 0x0b977, 0x0b98d, 0x0b9af, 0x0b9bb, 0x0b9c3, 0x0b9c9, 0x0b9d1, 0x0b9e1, 0x0b9ed, 0x0b9f3, 0x0b9f5, 0x0ba05, 0x0ba0f, 0x0ba11, 0x0ba2b, 0x0ba39, 0x0ba59, 0x0ba69, 0x0ba6f, 0x0ba81, 0x0ba87, 0x0ba8d, 0x0ba93, 0x0bab7, 0x0bad7, 0x0baf3, 0x0baff, 0x0bb01, 0x0bb07, 0x0bb15, 0x0bb19, 0x0bb23, 0x0bb3d, 0x0bb43, 0x0bb49, 0x0bb4f, 0x0bb51, 0x0bb5b, 0x0bb67, 0x0bb75, 0x0bb91, 0x0bbb5, 0x0bbbf, 0x0bbcb, 0x0bbcd, 0x0bbe5, 0x0bbe9, 0x0bc09, 0x0bc33, 0x0bc39, 0x0bc63, 0x0bc65, 0x0bc69, 0x0bc7b, 0x0bc8b, 0x0bc8d, 0x0bc9f, 0x0bca9, 0x0bcc3, 0x0bcd1, 0x0bcdd, 0x0bd01, 0x0bd13, 0x0bd25, 0x0bd2f, 0x0bd37, 0x0bd45, 0x0bd67, 0x0bd7f, 0x0bd83, 0x0bd89, 0x0bdb9, 0x0bdc1, 0x0bdd3, 0x0bdd9, 0x0bde5, 0x0bdf1, 0x0be07, 0x0be15, 0x0be23, 0x0be29, 0x0be37, 0x0be57, 0x0be5b, 0x0be61, 0x0be6b, 0x0be7f, 0x0be83, 0x0be91, 0x0beab, 0x0bed5, 0x0bed9, 0x0bef1, 0x0bf17, 0x0bf21, 0x0bf27, 0x0bf33, 0x0bf35, 0x0bf53, 0x0bf65, 0x0bf81, 0x0bf8b, 0x0bfa3, 0x0bfb7, 0x0bfc3, 0x0bfdb, 0x0bfed, 0x0bff5, 0x0bfff, 0x0c001, 0x0c007, 0x0c013, 0x0c01f, 0x0c025, 0x0c049, 0x0c073, 0x0c079, 0x0c085, 0x0c091, 0x0c097, 0x0c09d, 0x0c0a7, 0x0c0b9, 0x0c0cb, 0x0c0d5, 0x0c0e3, 0x0c0fb, 0x0c111, 0x0c121, 0x0c133, 0x0c13f, 0x0c14b, 0x0c15f, 0x0c16f, 0x0c177, 0x0c17b, 0x0c17d, 0x0c193, 0x0c1a5, 0x0c1bd, 0x0c1c3, 0x0c1c9, 0x0c1db, 0x0c1e7, 0x0c1ed, 0x0c205, 0x0c209, 0x0c217, 0x0c22d, 0x0c24d, 0x0c253, 0x0c255, 0x0c269, 0x0c299, 0x0c2b1, 0x0c2cf, 0x0c2d1, 0x0c2dd, 0x0c2f5, 0x0c301, 0x0c30d, 0x0c319, 0x0c325, 0x0c32f, 0x0c33d, 0x0c357, 0x0c35b, 0x0c361, 0x0c373, 0x0c383, 0x0c39b, 0x0c39d, 0x0c3ab, 0x0c3ad, 0x0c3bf, 0x0c3c7, 0x0c3e3, 0x0c3ef, 0x0c3fd, 0x0c405, 0x0c421, 0x0c433, 0x0c447, 0x0c44b, 0x0c45f, 0x0c46f, 0x0c47d, 0x0c493, 0x0c49f, 0x0c4a5, 0x0c4b7, 0x0c4c5, 0x0c4d7, 0x0c4dd, 0x0c4eb, 0x0c4f9, 0x0c4ff, 0x0c50b, 0x0c515, 0x0c52f, 0x0c531, 0x0c549, 0x0c55b, 0x0c575, 0x0c579, 0x0c589, 0x0c59b, 0x0c5a1, 0x0c5b3, 0x0c5b5, 0x0c5bf, 0x0c5cb, 0x0c5df, 0x0c5e9, 0x0c5ef, 0x0c5fd, 0x0c62f, 0x0c631, 0x0c63d, 0x0c645, 0x0c651, 0x0c66b, 0x0c675, 0x0c679, 0x0c68f, 0x0c697, 0x0c6ab, 0x0c6b3, 0x0c6b9, 0x0c6c1, 0x0c6c7, 0x0c6cd, 0x0c6d5, 0x0c6e3, 0x0c6e9, 0x0c6fb, 0x0c703, 0x0c709, 0x0c741, 0x0c753, 0x0c759, 0x0c763, 0x0c777, 0x0c799, 0x0c79f, 0x0c7a9, 0x0c7c3, 0x0c7d7, 0x0c7db, 0x0c7f5, 0x0c803, 0x0c81b, 0x0c835, 0x0c841, 0x0c84d, 0x0c85f, 0x0c865, 0x0c869, 0x0c895, 0x0c8bd, 0x0c8c9, 0x0c8f5, 0x0c8f9, 0x0c8ff, 0x0c901, 0x0c90d, 0x0c915, 0x0c91f, 0x0c923, 0x0c929, 0x0c931, 0x0c937, 0x0c93b, 0x0c95b, 0x0c95d, 0x0c961, 0x0c97f, 0x0c983, 0x0c98f, 0x0c9ab, 0x0c9c7, 0x0c9cb, 0x0c9cd, 0x0c9d9, 0x0ca25, 0x0ca29, 0x0ca2f, 0x0ca43, 0x0ca49, 0x0ca67, 0x0ca8f, 0x0caad, 0x0cabf, 0x0cad9, 0x0cae3, 0x0caef, 0x0caf1, 0x0cafd, 0x0cb17, 0x0cb1b, 0x0cb1d, 0x0cb2b, 0x0cb3f, 0x0cb55, 0x0cb69, 0x0cb71, 0x0cb7b, 0x0cb87, 0x0cbbd, 0x0cbd7, 0x0cbe7, 0x0cbf5, 0x0cc07, 0x0cc15, 0x0cc23, 0x0cc3d, 0x0cc49, 0x0cc5b, 0x0cc6d, 0x0cc7f, 0x0cc83, 0x0cc85, 0x0cc9b, 0x0cca1, 0x0cca7, 0x0ccad, 0x0ccb3, 0x0ccc1, 0x0ccd9, 0x0ccf7, 0x0ccfb, 0x0ccfd, 0x0cd11, 0x0cd2b, 0x0cd33, 0x0cd55, 0x0cd63, 0x0cd6f, 0x0cd87, 0x0cd9f, 0x0cda3, 0x0cdb7, 0x0cdc5, 0x0cdcf, 0x0cdd7, 0x0cde1, 0x0cdeb, 0x0cded, 0x0cdf9, 0x0cdff, 0x0ce03, 0x0ce11, 0x0ce27, 0x0ce35, 0x0ce39, 0x0ce65, 0x0ce6f, 0x0ce71, 0x0ce9f, 0x0cea5, 0x0cebb, 0x0cec3, 0x0cec5, 0x0cec9, 0x0ced1, 0x0ced7, 0x0cef3, 0x0ceff, 0x0cf0d, 0x0cf19, 0x0cf1f, 0x0cf2f, 0x0cf5d, 0x0cf61, 0x0cf6b, 0x0cf73, 0x0cf79, 0x0cf8f, 0x0cf9b, 0x0cf9d, 0x0cfa1, 0x0cfab, 0x0cfc7, 0x0cfd5, 0x0cfe9, 0x0cff7, 0x0d005, 0x0d009, 0x0d03f, 0x0d047, 0x0d04d, 0x0d071, 0x0d077, 0x0d087, 0x0d08b, 0x0d08d, 0x0d095, 0x0d099, 0x0d0a3, 0x0d0b1, 0x0d0c5, 0x0d0cf, 0x0d0d1, 0x0d0d7, 0x0d0e1, 0x0d0f9, 0x0d10b, 0x0d125, 0x0d129, 0x0d12f, 0x0d13d, 0x0d151, 0x0d15b, 0x0d16d, 0x0d19b, 0x0d1b9, 0x0d1bf, 0x0d1c1, 0x0d1d5, 0x0d1d9, 0x0d1ef, 0x0d1fd, 0x0d207, 0x0d215, 0x0d21f, 0x0d223, 0x0d229, 0x0d237, 0x0d257, 0x0d26b, 0x0d26d, 0x0d283, 0x0d28f, 0x0d2a1, 0x0d2b5, 0x0d2cb, 0x0d303, 0x0d317, 0x0d327, 0x0d341, 0x0d34b, 0x0d359, 0x0d35f, 0x0d369, 0x0d377, 0x0d37b, 0x0d381, 0x0d393, 0x0d399, 0x0d3a3, 0x0d3b1, 0x0d3c9, 0x0d3d1, 0x0d3dd, 0x0d3e7, 0x0d3ed, 0x0d415, 0x0d429, 0x0d43b, 0x0d45d, 0x0d461, 0x0d46b, 0x0d497, 0x0d49d, 0x0d4ab, 0x0d4b3, 0x0d4bf, 0x0d4c1, 0x0d4d3, 0x0d4e5, 0x0d4e9, 0x0d4f1, 0x0d50f, 0x0d51b, 0x0d527, 0x0d52b, 0x0d547, 0x0d555, 0x0d559, 0x0d563, 0x0d57d, 0x0d593, 0x0d5b7, 0x0d5c3, 0x0d5e1, 0x0d5e7, 0x0d5f3, 0x0d5f5, 0x0d605, 0x0d627, 0x0d62b, 0x0d64b, 0x0d65f, 0x0d663, 0x0d66f, 0x0d67d, 0x0d687, 0x0d6a9, 0x0d6b7, 0x0d6c5, 0x0d6d7, 0x0d6e1, 0x0d6ed, 0x0d6f3, 0x0d715, 0x0d723, 0x0d725, 0x0d731, 0x0d767, 0x0d76d, 0x0d77f, 0x0d7ad, 0x0d7b3, 0x0d7b5, 0x0d7bf, 0x0d7d9, 0x0d7f7, 0x0d7fb, 0x0d80d, 0x0d813, 0x0d849, 0x0d857, 0x0d86d, 0x0d889, 0x0d88f, 0x0d89b, 0x0d8a7, 0x0d8ab, 0x0d8b5, 0x0d8c1, 0x0d8c7, 0x0d8d3, 0x0d8d9, 0x0d8e5, 0x0d909, 0x0d91b, 0x0d933, 0x0d941, 0x0d947, 0x0d94d, 0x0d95f, 0x0d965, 0x0d971, 0x0d98b, 0x0d999, 0x0d99f, 0x0d9a3, 0x0d9a9, 0x0d9b1, 0x0d9c3, 0x0d9d7, 0x0d9f3, 0x0d9f9, 0x0da05, 0x0da17, 0x0da27, 0x0da33, 0x0da35, 0x0da3f, 0x0da59, 0x0da7d, 0x0da8b, 0x0da93, 0x0da99, 0x0daa3, 0x0dab1, 0x0dac3, 0x0dadd, 0x0daed, 0x0db07, 0x0db1f, 0x0db25, 0x0db29, 0x0db3b, 0x0db45, 0x0db4f, 0x0db61, 0x0db83, 0x0db91, 0x0dba1, 0x0dbc7, 0x0dbcd, 0x0dbd5, 0x0dbdf, 0x0dbe3, 0x0dbe9, 0x0dbef, 0x0dbfb, 0x0dbfd, 0x0dc11, 0x0dc2b, 0x0dc35, 0x0dc39, 0x0dc41, 0x0dc6f, 0x0dc71, 0x0dc93, 0x0dc9f, 0x0dcaf, 0x0dcd1, 0x0dcd7, 0x0dcdb, 0x0dcf5, 0x0dd19, 0x0dd29, 0x0dd31, 0x0dd45, 0x0dd57, 0x0dd67, 0x0dd73, 0x0dd75, 0x0dd9d, 0x0ddad, 0x0ddc1, 0x0ddd5, 0x0dde5, 0x0ddf7, 0x0ddfb, 0x0de07, 0x0de2f, 0x0de3d, 0x0de49, 0x0de4f, 0x0de51, 0x0de6d, 0x0de75, 0x0de83, 0x0de85, 0x0de89, 0x0dea1, 0x0dead, 0x0decb, 0x0decd, 0x0ded3, 0x0df03, 0x0df05, 0x0df17, 0x0df1d, 0x0df21, 0x0df33, 0x0df59, 0x0df63, 0x0df69, 0x0df6f, 0x0df71, 0x0dfa5, 0x0dfbb, 0x0dfc9, 0x0dfdb, 0x0dfeb, 0x0e003, 0x0e00f, 0x0e011, 0x0e033, 0x0e035, 0x0e047, 0x0e04b, 0x0e05f, 0x0e07b, 0x0e07d, 0x0e08d, 0x0e09f, 0x0e0a9, 0x0e0b1, 0x0e0b7, 0x0e0c5, 0x0e0d7, 0x0e0db, 0x0e0dd, 0x0e0ed, 0x0e0ff, 0x0e101, 0x0e10b, 0x0e125, 0x0e131, 0x0e149, 0x0e14f, 0x0e151, 0x0e15d, 0x0e167, 0x0e16b, 0x0e17f, 0x0e1b3, 0x0e1d3, 0x0e1d9, 0x0e1ef, 0x0e207, 0x0e20b, 0x0e219, 0x0e223, 0x0e231, 0x0e245, 0x0e24f, 0x0e267, 0x0e279, 0x0e285, 0x0e28f, 0x0e29b, 0x0e29d, 0x0e2a1, 0x0e2ab, 0x0e2ad, 0x0e2bf, 0x0e2c1, 0x0e2d5, 0x0e30f, 0x0e311, 0x0e31b, 0x0e335, 0x0e339, 0x0e359, 0x0e363, 0x0e365, 0x0e377, 0x0e38d, 0x0e393, 0x0e39f, 0x0e3a5, 0x0e3af, 0x0e3b7, 0x0e3c3, 0x0e3db, 0x0e3f3, 0x0e3ff, 0x0e41f, 0x0e431, 0x0e449, 0x0e45b, 0x0e46b, 0x0e46d, 0x0e473, 0x0e479, 0x0e485, 0x0e491, 0x0e497, 0x0e49d, 0x0e4a1, 0x0e4ab, 0x0e4cb, 0x0e4cd, 0x0e4df, 0x0e4f1, 0x0e4fd, 0x0e503, 0x0e505, 0x0e517, 0x0e51b, 0x0e52d, 0x0e533, 0x0e581, 0x0e595, 0x0e5a5, 0x0e5af, 0x0e5e7, 0x0e605, 0x0e621, 0x0e639, 0x0e63f, 0x0e647, 0x0e653, 0x0e669, 0x0e687, 0x0e6bb, 0x0e6bd, 0x0e6d7, 0x0e6dd, 0x0e6eb, 0x0e6f5, 0x0e6f9, 0x0e701, 0x0e719, 0x0e729, 0x0e76d, 0x0e775, 0x0e783, 0x0e78f, 0x0e7a7, 0x0e7ab, 0x0e7ad, 0x0e7b5, 0x0e7cb, 0x0e7d3, 0x0e7d5, 0x0e7e5, 0x0e7f1, 0x0e7f7, 0x0e801, 0x0e82f, 0x0e843, 0x0e84f, 0x0e851, 0x0e85b, 0x0e86d, 0x0e879, 0x0e889, 0x0e891, 0x0e8a7, 0x0e8bf, 0x0e8c1, 0x0e8cb, 0x0e8cd, 0x0e8d3, 0x0e8fb, 0x0e8fd, 0x0e903, 0x0e90f, 0x0e921, 0x0e927, 0x0e92b, 0x0e935, 0x0e959, 0x0e95f, 0x0e963, 0x0e969, 0x0e971, 0x0e98d, 0x0e9b7, 0x0e9c5, 0x0e9cf, 0x0e9d7, 0x0e9ed, 0x0ea09, 0x0ea1b, 0x0ea2d, 0x0ea4b, 0x0ea59, 0x0ea71, 0x0ea7d, 0x0ea81, 0x0ea8d, 0x0eabb, 0x0eac3, 0x0eac9, 0x0ead7, 0x0eaed, 0x0eaff, 0x0eb07, 0x0eb0b, 0x0eb19, 0x0eb23, 0x0eb29, 0x0eb3b, 0x0eb45, 0x0eb57, 0x0eb5d, 0x0eb67, 0x0eb6b, 0x0eb73, 0x0eb75, 0x0eb97, 0x0eb9b, 0x0ebad, 0x0ebb3, 0x0ebcd, 0x0ebd3, 0x0ebd5, 0x0ebe3, 0x0ec09, 0x0ec11, 0x0ec21, 0x0ec35, 0x0ec4b, 0x0ec5f, 0x0ec65, 0x0ec69, 0x0ec7d, 0x0ec93, 0x0ec99, 0x0ecaf, 0x0ecb7, 0x0ecbd, 0x0ecc9, 0x0eccf, 0x0ecff, 0x0ed07, 0x0ed15, 0x0ed23, 0x0ed37, 0x0ed49, 0x0ed5d, 0x0ed61, 0x0ed6b, 0x0ed97, 0x0eda1, 0x0edab, 0x0edb3, 0x0edc7, 0x0edcd, 0x0edd9, 0x0eddf, 0x0edef, 0x0edf1, 0x0edfd, 0x0ee01, 0x0ee0b, 0x0ee0d, 0x0ee1f, 0x0ee29, 0x0ee75, 0x0ee83, 0x0ee89, 0x0ee9d, 0x0eead, 0x0eeb9, 0x0eec7, 0x0eecb, 0x0eed9, 0x0eee3, 0x0eef1, 0x0eef7, 0x0ef0f, 0x0ef21, 0x0ef2d, 0x0ef33, 0x0ef39, 0x0ef4d, 0x0ef77, 0x0ef95, 0x0ef9f, 0x0efb1, 0x0efbb, 0x0efe1, 0x0efe7, 0x0efeb, 0x0eff3, 0x0eff5, 0x0eff9, 0x0f007, 0x0f00d, 0x0f01f, 0x0f029, 0x0f02f, 0x0f045, 0x0f05d, 0x0f085, 0x0f089, 0x0f097, 0x0f0ab, 0x0f0bf, 0x0f0c7, 0x0f0d5, 0x0f0df, 0x0f0f1, 0x0f0f7, 0x0f111, 0x0f11b, 0x0f135, 0x0f141, 0x0f147, 0x0f14b, 0x0f153, 0x0f163, 0x0f171, 0x0f18d, 0x0f193, 0x0f1c5, 0x0f1e1, 0x0f1e7, 0x0f1f3, 0x0f1f5, 0x0f1ff, 0x0f217, 0x0f21d, 0x0f247, 0x0f24d, 0x0f255, 0x0f259, 0x0f26f, 0x0f27b, 0x0f287, 0x0f29f, 0x0f2a5, 0x0f2db, 0x0f2dd, 0x0f2f9, 0x0f2ff, 0x0f301, 0x0f30b, 0x0f315, 0x0f32f, 0x0f337, 0x0f343, 0x0f385, 0x0f389, 0x0f391, 0x0f397, 0x0f3b3, 0x0f3df, 0x0f3e5, 0x0f405, 0x0f40f, 0x0f417, 0x0f421, 0x0f439, 0x0f453, 0x0f455, 0x0f463, 0x0f465, 0x0f469, 0x0f47b, 0x0f48b, 0x0f499, 0x0f4a3, 0x0f4b1, 0x0f4bd, 0x0f4c3, 0x0f4cf, 0x0f4f3, 0x0f4f5, 0x0f4f9, 0x0f50d, 0x0f519, 0x0f525, 0x0f537, 0x0f53b, 0x0f551, 0x0f561, 0x0f56d, 0x0f591, 0x0f59d, 0x0f5a7, 0x0f5b5, 0x0f5bf, 0x0f5c1, 0x0f5c7, 0x0f601, 0x0f615, 0x0f61f, 0x0f623, 0x0f63b, 0x0f645, 0x0f64f, 0x0f65d, 0x0f66b, 0x0f673, 0x0f683, 0x0f685, 0x0f6b3, 0x0f6b5, 0x0f6d9, 0x0f6ef, 0x0f6fb, 0x0f721, 0x0f72d, 0x0f73f, 0x0f74d, 0x0f753, 0x0f769, 0x0f76f, 0x0f787, 0x0f78b, 0x0f795, 0x0f7a3, 0x0f7b1, 0x0f7b7, 0x0f7c3, 0x0f7c9, 0x0f7db, 0x0f7ff, 0x0f803, 0x0f809, 0x0f80f, 0x0f81d, 0x0f827, 0x0f82d, 0x0f839, 0x0f83f, 0x0f84b, 0x0f86f, 0x0f871, 0x0f877, 0x0f893, 0x0f8c5, 0x0f8db, 0x0f8e1, 0x0f8ed, 0x0f8f3, 0x0f8f5, 0x0f907, 0x0f915, 0x0f923, 0x0f93b, 0x0f93d, 0x0f94f, 0x0f951, 0x0f973, 0x0f979, 0x0f985, 0x0f99b, 0x0f9b3, 0x0f9b9, 0x0f9c7, 0x0f9e3, 0x0f9e9, 0x0f9f7, 0x0fa01, 0x0fa07, 0x0fa13, 0x0fa23, 0x0fa37, 0x0fa49, 0x0fa6b, 0x0fa75, 0x0fa79, 0x0fa7f, 0x0fa83, 0x0fa97, 0x0fa9b, 0x0faa1, 0x0fac1, 0x0facb, 0x0fad9, 0x0fadf, 0x0fae5, 0x0fb05, 0x0fb0f, 0x0fb21, 0x0fb27, 0x0fb35, 0x0fb4d, 0x0fb55, 0x0fb5f, 0x0fb69, 0x0fb71, 0x0fb81, 0x0fb8d, 0x0fba3, 0x0fba9, 0x0fbb7, 0x0fbc9, 0x0fbcf, 0x0fbdb, 0x0fbe1, 0x0fc0b, 0x0fc0d, 0x0fc1f, 0x0fc49, 0x0fc5b, 0x0fc67, 0x0fc75, 0x0fc83, 0x0fcad, 0x0fcd3, 0x0fcef, 0x0fd0f, 0x0fd17, 0x0fd1d, 0x0fd2b, 0x0fd2d, 0x0fd39, 0x0fd47, 0x0fd53, 0x0fd71, 0x0fd8b, 0x0fd95, 0x0fd99, 0x0fda3, 0x0fdaf, 0x0fdb1, 0x0fdc3, 0x0fddd, 0x0fde1, 0x0fdeb, 0x0fe05, 0x0fe2d, 0x0fe33, 0x0fe35, 0x0fe41, 0x0fe4d, 0x0fe59, 0x0fe5f, 0x0fe7d, 0x0fe87, 0x0fe93, 0x0fe99, 0x0feb1, 0x0febd, 0x0fec9, 0x0feeb, 0x0feff, 0x0ff01, 0x0ff07, 0x0ff13, 0x0ff23, 0x0ff29, 0x0ff37, 0x0ff4f, 0x0ff57, 0x0ff5d, 0x0ff61, 0x0ff73, 0x0ff7f, 0x0ff8f, 0x0ff91, 0x0ffb3, 0x0ffc7, 0x0ffd9, 0x0ffe9, 0x0ffef, 0x0fffd }; const word _irreducible_polynomials_degree_16[4081] = { 4080, 0x1002b, 0x1002d, 0x10039, 0x1003f, 0x10047, 0x10053, 0x1008d, 0x100bd, 0x100d7, 0x100f5, 0x10129, 0x1012f, 0x1013b, 0x1013d, 0x1014f, 0x1015d, 0x10173, 0x10175, 0x1018f, 0x10197, 0x101a1, 0x101ad, 0x101bf, 0x101c7, 0x101cd, 0x101d5, 0x101e9, 0x101f7, 0x10215, 0x10219, 0x10225, 0x1022f, 0x1025b, 0x1025d, 0x1026b, 0x1026d, 0x10275, 0x10285, 0x10291, 0x102a1, 0x102b3, 0x102cd, 0x102d3, 0x102e5, 0x102f7, 0x102fb, 0x10317, 0x1031d, 0x10335, 0x10339, 0x1034b, 0x1034d, 0x10369, 0x1036f, 0x10371, 0x10387, 0x1038d, 0x1039f, 0x103a3, 0x103bb, 0x103cf, 0x103dd, 0x103ed, 0x103f9, 0x1040b, 0x10425, 0x10429, 0x10457, 0x10461, 0x10467, 0x1046d, 0x10483, 0x10489, 0x10491, 0x104a7, 0x104ad, 0x104b5, 0x104bf, 0x104c1, 0x104fd, 0x10509, 0x10533, 0x10539, 0x10547, 0x10563, 0x10565, 0x10569, 0x10587, 0x105af, 0x105c3, 0x105cf, 0x105d1, 0x105dd, 0x105eb, 0x105f5, 0x105f9, 0x10641, 0x1064b, 0x10653, 0x1065f, 0x1067b, 0x1067d, 0x10687, 0x1068b, 0x106a3, 0x106af, 0x106c3, 0x106c9, 0x106f5, 0x106ff, 0x1076b, 0x1076d, 0x10779, 0x1077f, 0x10783, 0x1078f, 0x10797, 0x107ef, 0x107f1, 0x107fb, 0x10807, 0x1080b, 0x1080d, 0x10815, 0x1083b, 0x10861, 0x10879, 0x10897, 0x1089d, 0x108ab, 0x108bf, 0x108d5, 0x108df, 0x108e3, 0x108e9, 0x108f1, 0x108fb, 0x10939, 0x10959, 0x10977, 0x1097d, 0x1098b, 0x109a5, 0x109af, 0x109c3, 0x109c5, 0x109e1, 0x109e7, 0x109ed, 0x109f3, 0x10a03, 0x10a11, 0x10a35, 0x10a39, 0x10a5f, 0x10a63, 0x10a71, 0x10a7d, 0x10a81, 0x10a9f, 0x10abb, 0x10ac5, 0x10acf, 0x10add, 0x10ae1, 0x10aff, 0x10b01, 0x10b13, 0x10b15, 0x10b51, 0x10b5d, 0x10b91, 0x10b97, 0x10bb3, 0x10bcd, 0x10bd3, 0x10be5, 0x10bef, 0x10bf7, 0x10bfd, 0x10c0f, 0x10c1d, 0x10c21, 0x10c2b, 0x10c41, 0x10c69, 0x10c71, 0x10c7b, 0x10c8d, 0x10c95, 0x10ca3, 0x10caf, 0x10cbd, 0x10cdd, 0x10ceb, 0x10cf3, 0x10d0d, 0x10d19, 0x10d1f, 0x10d43, 0x10d49, 0x10d83, 0x10d8f, 0x10d91, 0x10d9d, 0x10dad, 0x10db5, 0x10dd3, 0x10de3, 0x10de5, 0x10e23, 0x10e45, 0x10e73, 0x10e7f, 0x10e85, 0x10e91, 0x10e97, 0x10e9d, 0x10ea1, 0x10ea7, 0x10ec7, 0x10ecb, 0x10efd, 0x10f03, 0x10f05, 0x10f09, 0x10f1b, 0x10f21, 0x10f69, 0x10f77, 0x10f87, 0x10f8b, 0x10f95, 0x10f99, 0x10fb1, 0x10fbd, 0x10fcf, 0x10fdb, 0x10fe7, 0x10feb, 0x1100b, 0x11025, 0x1103d, 0x11043, 0x1104f, 0x1105d, 0x1107f, 0x11083, 0x11085, 0x11097, 0x1109b, 0x110a1, 0x110a7, 0x110b5, 0x1111d, 0x11127, 0x11139, 0x11147, 0x1114d, 0x1115f, 0x1116f, 0x111c9, 0x111db, 0x111dd, 0x111eb, 0x111ed, 0x111f3, 0x111f9, 0x11241, 0x11247, 0x11253, 0x11255, 0x11263, 0x11277, 0x11281, 0x1128b, 0x11293, 0x11299, 0x112a5, 0x112af, 0x112bd, 0x112c5, 0x112d7, 0x112db, 0x112e1, 0x112ed, 0x112f3, 0x112ff, 0x1133b, 0x1133d, 0x11351, 0x11357, 0x11361, 0x1136b, 0x11375, 0x11379, 0x1139d, 0x113a1, 0x113df, 0x113f7, 0x11409, 0x1141b, 0x11433, 0x11435, 0x1144b, 0x1144d, 0x11459, 0x1145f, 0x11487, 0x11493, 0x114af, 0x114b1, 0x114c9, 0x114d1, 0x114ed, 0x114f9, 0x11507, 0x11513, 0x1158f, 0x11591, 0x115a1, 0x115a7, 0x115ab, 0x115d5, 0x115d9, 0x115df, 0x115e3, 0x115fb, 0x1161f, 0x1162f, 0x11637, 0x1163d, 0x11643, 0x11645, 0x11649, 0x11651, 0x11675, 0x11683, 0x1169b, 0x116ab, 0x116d5, 0x116d9, 0x116f7, 0x116fd, 0x1170f, 0x1172d, 0x11733, 0x1173f, 0x11741, 0x11747, 0x1174d, 0x11753, 0x11765, 0x11795, 0x11799, 0x117a3, 0x117a9, 0x117d7, 0x117eb, 0x117f5, 0x1182b, 0x11833, 0x11855, 0x11863, 0x1186f, 0x11887, 0x1188d, 0x11893, 0x118a9, 0x118b1, 0x118b7, 0x118bb, 0x118cf, 0x118db, 0x118f9, 0x118ff, 0x1190b, 0x11923, 0x11925, 0x11931, 0x11937, 0x1195b, 0x1196b, 0x1196d, 0x11975, 0x11979, 0x119b3, 0x119cb, 0x119cd, 0x119e5, 0x119fb, 0x119fd, 0x11a07, 0x11a0d, 0x11a23, 0x11a31, 0x11a43, 0x11a51, 0x11a57, 0x11a5d, 0x11a6b, 0x11a73, 0x11a79, 0x11a89, 0x11a9b, 0x11abf, 0x11ae3, 0x11ae5, 0x11b09, 0x11b1d, 0x11b27, 0x11b2b, 0x11b47, 0x11b4b, 0x11b55, 0x11b59, 0x11b65, 0x11b7d, 0x11b8b, 0x11b8d, 0x11b93, 0x11baf, 0x11bbb, 0x11bbd, 0x11bc3, 0x11bd7, 0x11be1, 0x11bff, 0x11c07, 0x11c13, 0x11c23, 0x11c29, 0x11c45, 0x11c4f, 0x11c57, 0x11c5d, 0x11c61, 0x11c7f, 0x11c85, 0x11c9d, 0x11ca1, 0x11cb5, 0x11ccb, 0x11ccd, 0x11cd9, 0x11ce9, 0x11cef, 0x11cf1, 0x11d17, 0x11d1b, 0x11d2d, 0x11d3f, 0x11d4b, 0x11d53, 0x11d59, 0x11d5f, 0x11d65, 0x11d69, 0x11d77, 0x11d81, 0x11d87, 0x11dc9, 0x11ded, 0x11dff, 0x11e21, 0x11e3f, 0x11e5f, 0x11e71, 0x11e7b, 0x11e99, 0x11e9f, 0x11ea9, 0x11ebd, 0x11ec3, 0x11ec5, 0x11ecf, 0x11edd, 0x11ee7, 0x11eeb, 0x11ef3, 0x11f13, 0x11f29, 0x11f2f, 0x11f3b, 0x11f3d, 0x11f57, 0x11f67, 0x11f6d, 0x11f73, 0x11f75, 0x11f83, 0x11f9b, 0x11f9d, 0x11fb9, 0x11fbf, 0x11fc1, 0x1200d, 0x1201f, 0x12025, 0x1203d, 0x12051, 0x12073, 0x1208f, 0x1209b, 0x120ab, 0x120b5, 0x120b9, 0x120c7, 0x120e3, 0x12105, 0x12109, 0x1211b, 0x12141, 0x12153, 0x12163, 0x12169, 0x1217b, 0x12187, 0x12195, 0x121a9, 0x121b1, 0x121c5, 0x121cf, 0x121e1, 0x121e7, 0x121f9, 0x12217, 0x12227, 0x12235, 0x12241, 0x12253, 0x12259, 0x1225f, 0x12269, 0x1227b, 0x1228b, 0x12293, 0x12295, 0x122a5, 0x122af, 0x122b1, 0x122cf, 0x122dd, 0x122e7, 0x122f9, 0x12315, 0x1231f, 0x12325, 0x12329, 0x12345, 0x12357, 0x1236d, 0x12383, 0x12397, 0x123a7, 0x123b9, 0x123e3, 0x123e9, 0x12403, 0x1240f, 0x1242d, 0x12439, 0x1243f, 0x1244b, 0x12453, 0x1248d, 0x12499, 0x1249f, 0x124b7, 0x124c5, 0x124d1, 0x124e7, 0x124ff, 0x12515, 0x12523, 0x12529, 0x1253b, 0x12557, 0x1255d, 0x12561, 0x12573, 0x12579, 0x12585, 0x1259b, 0x125bf, 0x125cd, 0x125e5, 0x125f7, 0x12607, 0x12615, 0x12623, 0x1263d, 0x12651, 0x1265b, 0x1266d, 0x12675, 0x1267f, 0x12683, 0x1269b, 0x126ad, 0x126c1, 0x126e9, 0x126fb, 0x12711, 0x1272b, 0x12733, 0x12739, 0x12759, 0x12763, 0x12771, 0x12787, 0x127a5, 0x127c5, 0x127d7, 0x127ed, 0x127f3, 0x1281b, 0x1281d, 0x1282d, 0x12841, 0x12847, 0x12871, 0x1287b, 0x1289f, 0x128a3, 0x128a9, 0x128af, 0x128c5, 0x128f5, 0x12901, 0x1290b, 0x12919, 0x1292f, 0x1293b, 0x12943, 0x12945, 0x12949, 0x12973, 0x1297f, 0x12983, 0x12989, 0x1299d, 0x129ad, 0x129b9, 0x129c7, 0x129d5, 0x129f1, 0x129f7, 0x12a25, 0x12a29, 0x12a31, 0x12a3b, 0x12a7f, 0x12a85, 0x12a9d, 0x12aa7, 0x12acd, 0x12ad5, 0x12ad9, 0x12ae3, 0x12ae9, 0x12af1, 0x12b05, 0x12b09, 0x12b27, 0x12b33, 0x12b35, 0x12b53, 0x12b69, 0x12b6f, 0x12b7b, 0x12b7d, 0x12b93, 0x12b99, 0x12baf, 0x12bbd, 0x12bd1, 0x12bdb, 0x12bdd, 0x12bf9, 0x12c07, 0x12c0b, 0x12c1f, 0x12c3b, 0x12c3d, 0x12c43, 0x12c4f, 0x12c5d, 0x12c61, 0x12c6b, 0x12c73, 0x12c79, 0x12c89, 0x12cad, 0x12cb9, 0x12ccd, 0x12cd3, 0x12ce3, 0x12cf7, 0x12cfb, 0x12d09, 0x12d27, 0x12d2d, 0x12d35, 0x12d41, 0x12d63, 0x12d65, 0x12d71, 0x12d77, 0x12d81, 0x12d8b, 0x12d8d, 0x12d9f, 0x12da9, 0x12ddb, 0x12de1, 0x12e0f, 0x12e17, 0x12e21, 0x12e33, 0x12e35, 0x12e47, 0x12e4b, 0x12e4d, 0x12e5f, 0x12e6f, 0x12e71, 0x12e8d, 0x12ea5, 0x12ea9, 0x12ec5, 0x12ec9, 0x12eeb, 0x12ef9, 0x12eff, 0x12f01, 0x12f07, 0x12f13, 0x12f3d, 0x12f43, 0x12f5b, 0x12f67, 0x12f7f, 0x12f89, 0x12f97, 0x12f9b, 0x12fa7, 0x12fb5, 0x12fbf, 0x12fcb, 0x12fd3, 0x12fdf, 0x13005, 0x13017, 0x13027, 0x1304b, 0x13059, 0x13065, 0x13077, 0x1307b, 0x1307d, 0x13081, 0x13093, 0x130af, 0x130b7, 0x130c3, 0x130db, 0x130dd, 0x130ed, 0x130f5, 0x130f9, 0x13107, 0x1310d, 0x13125, 0x13129, 0x1315d, 0x13161, 0x13173, 0x131a7, 0x131ab, 0x131cd, 0x131d9, 0x131e3, 0x131fb, 0x1322f, 0x13237, 0x1323d, 0x13249, 0x13257, 0x1325b, 0x13267, 0x13275, 0x13291, 0x1329d, 0x132ab, 0x132ad, 0x132b5, 0x132cb, 0x132d9, 0x132ef, 0x132f1, 0x13305, 0x1331b, 0x1332b, 0x13339, 0x13347, 0x13377, 0x1339f, 0x133a9, 0x133c5, 0x133d1, 0x133e1, 0x133eb, 0x133f3, 0x1340b, 0x13415, 0x13419, 0x13425, 0x13443, 0x13451, 0x1347f, 0x13489, 0x1349b, 0x134a7, 0x134bf, 0x134cd, 0x134fd, 0x13505, 0x1351d, 0x13521, 0x13533, 0x13547, 0x1354b, 0x13559, 0x13571, 0x1357d, 0x1359f, 0x135a3, 0x135b1, 0x135c9, 0x135d7, 0x135f3, 0x1360f, 0x1361b, 0x1361d, 0x13635, 0x1363f, 0x1366f, 0x13671, 0x1368d, 0x13699, 0x136a9, 0x136b7, 0x136c3, 0x136d1, 0x136d7, 0x136ed, 0x13707, 0x1370b, 0x13719, 0x13723, 0x13743, 0x1374f, 0x13751, 0x13775, 0x13779, 0x13797, 0x137ab, 0x137b9, 0x137cd, 0x137d5, 0x137e3, 0x13801, 0x1380d, 0x1381f, 0x13837, 0x13849, 0x1384f, 0x1385b, 0x1385d, 0x1386d, 0x1388f, 0x138a1, 0x138c7, 0x138cb, 0x138cd, 0x138df, 0x138fb, 0x13911, 0x13917, 0x13921, 0x13935, 0x1393f, 0x13941, 0x1394d, 0x13953, 0x1395f, 0x13965, 0x1396f, 0x13977, 0x13981, 0x13999, 0x139a3, 0x139b7, 0x139bd, 0x139c9, 0x139eb, 0x139ff, 0x13a09, 0x13a1b, 0x13a1d, 0x13a2d, 0x13a33, 0x13a4d, 0x13a55, 0x13a63, 0x13a65, 0x13a69, 0x13a77, 0x13a95, 0x13a9f, 0x13aaf, 0x13abb, 0x13abd, 0x13acf, 0x13af3, 0x13af9, 0x13b13, 0x13b29, 0x13b3b, 0x13b3d, 0x13b45, 0x13b5b, 0x13b6d, 0x13b73, 0x13b83, 0x13b89, 0x13b9d, 0x13bcb, 0x13bd3, 0x13bd9, 0x13be3, 0x13bef, 0x13bf1, 0x13c05, 0x13c21, 0x13c27, 0x13c47, 0x13c4b, 0x13c69, 0x13c6f, 0x13c93, 0x13cb1, 0x13cdb, 0x13cf3, 0x13cf9, 0x13cff, 0x13d07, 0x13d13, 0x13d23, 0x13d31, 0x13d3d, 0x13d43, 0x13d49, 0x13d67, 0x13d6b, 0x13d6d, 0x13d83, 0x13d91, 0x13db3, 0x13db5, 0x13dc1, 0x13dcd, 0x13dd9, 0x13def, 0x13dfb, 0x13e0b, 0x13e19, 0x13e1f, 0x13e31, 0x13e3b, 0x13e43, 0x13e45, 0x13e51, 0x13e57, 0x13e79, 0x13e85, 0x13e89, 0x13e8f, 0x13eb9, 0x13ed3, 0x13edf, 0x13ee9, 0x13efb, 0x13f03, 0x13f09, 0x13f11, 0x13f1d, 0x13f41, 0x13f81, 0x13f8b, 0x13f9f, 0x13fa5, 0x13fa9, 0x13fb7, 0x13fc5, 0x13fdb, 0x13ff5, 0x14019, 0x14075, 0x14079, 0x14085, 0x1409b, 0x1409d, 0x140ad, 0x140cd, 0x140d3, 0x140e3, 0x140f7, 0x14103, 0x14109, 0x1413f, 0x1414b, 0x14153, 0x14159, 0x14163, 0x14193, 0x14199, 0x141a9, 0x141af, 0x141bb, 0x141bd, 0x141e1, 0x141e7, 0x141f3, 0x141ff, 0x14203, 0x14205, 0x14211, 0x1424b, 0x14255, 0x14271, 0x1427b, 0x1427d, 0x14281, 0x14287, 0x142a5, 0x142a9, 0x142b7, 0x142bb, 0x142cf, 0x142e1, 0x142ed, 0x142f9, 0x14307, 0x1430b, 0x14323, 0x14343, 0x14349, 0x1437f, 0x14383, 0x143a7, 0x143bf, 0x143ef, 0x143f7, 0x143fb, 0x1440f, 0x14447, 0x1444d, 0x14471, 0x14477, 0x1447b, 0x1448d, 0x14495, 0x1449f, 0x144a3, 0x144bb, 0x144c5, 0x144d1, 0x144e1, 0x144f9, 0x1450b, 0x1451f, 0x14529, 0x14537, 0x14543, 0x14573, 0x14589, 0x1458f, 0x1459b, 0x1459d, 0x145b5, 0x145b9, 0x145e5, 0x145f7, 0x145fd, 0x14623, 0x14629, 0x14637, 0x14645, 0x14649, 0x1465d, 0x14673, 0x1468f, 0x14691, 0x146a1, 0x146cb, 0x146d5, 0x146df, 0x146e9, 0x146f1, 0x146fd, 0x14709, 0x14717, 0x14721, 0x1472d, 0x1474b, 0x1475f, 0x1477d, 0x14793, 0x14795, 0x14799, 0x147af, 0x147b7, 0x147c3, 0x147c9, 0x147eb, 0x147f9, 0x14809, 0x14811, 0x1481d, 0x14827, 0x14835, 0x1483f, 0x14841, 0x14847, 0x14859, 0x14881, 0x14893, 0x148a9, 0x148c3, 0x148d7, 0x148db, 0x148f5, 0x148ff, 0x1490b, 0x14915, 0x14919, 0x1492f, 0x14931, 0x14943, 0x1495d, 0x14967, 0x14989, 0x1498f, 0x149df, 0x149e5, 0x149fb, 0x14a13, 0x14a15, 0x14a23, 0x14a2f, 0x14a67, 0x14a6d, 0x14a75, 0x14a85, 0x14a89, 0x14a91, 0x14aa7, 0x14aad, 0x14ab5, 0x14abf, 0x14acb, 0x14adf, 0x14ae9, 0x14af7, 0x14afb, 0x14b03, 0x14b0f, 0x14b21, 0x14b4d, 0x14b63, 0x14b7d, 0x14b87, 0x14b95, 0x14bb7, 0x14bbb, 0x14bc9, 0x14bcf, 0x14bdb, 0x14bdd, 0x14bf3, 0x14bf9, 0x14bff, 0x14c0d, 0x14c19, 0x14c1f, 0x14c67, 0x14c73, 0x14c83, 0x14c9b, 0x14ca7, 0x14cb9, 0x14cc7, 0x14ccb, 0x14cf7, 0x14cfd, 0x14d0f, 0x14d2d, 0x14d39, 0x14d41, 0x14d4d, 0x14d53, 0x14d5f, 0x14d69, 0x14d71, 0x14d7b, 0x14d8b, 0x14d8d, 0x14db1, 0x14dd1, 0x14de7, 0x14deb, 0x14df3, 0x14e17, 0x14e4b, 0x14e6f, 0x14e77, 0x14e81, 0x14e87, 0x14ea3, 0x14eaf, 0x14eb1, 0x14ebd, 0x14edb, 0x14eeb, 0x14ef5, 0x14eff, 0x14f13, 0x14f25, 0x14f31, 0x14f45, 0x14f49, 0x14f4f, 0x14f57, 0x14f5b, 0x14f61, 0x14f6b, 0x14f6d, 0x14f8f, 0x14fa1, 0x14fc7, 0x14fcd, 0x14fe3, 0x14ffb, 0x14ffd, 0x15003, 0x1500f, 0x15021, 0x15027, 0x1502b, 0x15059, 0x15081, 0x1509f, 0x150a5, 0x150b7, 0x150bb, 0x150c9, 0x150d7, 0x150eb, 0x150ff, 0x15107, 0x15125, 0x15149, 0x1515d, 0x1516d, 0x15175, 0x15183, 0x15189, 0x151a1, 0x151a7, 0x151b3, 0x151b5, 0x151cd, 0x151d3, 0x151df, 0x1520b, 0x1521f, 0x1522f, 0x15245, 0x1525d, 0x15261, 0x1526b, 0x15289, 0x15297, 0x152b3, 0x152b9, 0x152bf, 0x152c7, 0x152d5, 0x152f7, 0x152fb, 0x15303, 0x15309, 0x1531d, 0x15327, 0x15363, 0x1536f, 0x15377, 0x1539f, 0x153a5, 0x153bd, 0x153c5, 0x153d1, 0x153db, 0x153e1, 0x153ed, 0x153f5, 0x1540d, 0x15413, 0x1541f, 0x15431, 0x15457, 0x1545b, 0x1547f, 0x15483, 0x15485, 0x15491, 0x15497, 0x154b5, 0x154b9, 0x154cb, 0x154cd, 0x154d5, 0x154df, 0x15527, 0x1552d, 0x1554b, 0x15593, 0x155af, 0x155b1, 0x155b7, 0x155cf, 0x155db, 0x155ed, 0x155f5, 0x1560f, 0x15617, 0x15621, 0x1562b, 0x1562d, 0x15647, 0x15653, 0x15655, 0x15687, 0x1568d, 0x15695, 0x1569f, 0x156a9, 0x156af, 0x156c5, 0x156cf, 0x156d1, 0x156f5, 0x15701, 0x1570d, 0x15729, 0x15751, 0x1575b, 0x15767, 0x15783, 0x1579d, 0x157ab, 0x157c7, 0x157d3, 0x157d9, 0x15807, 0x15825, 0x1583d, 0x15851, 0x1585b, 0x1586b, 0x15883, 0x15889, 0x158a1, 0x158b3, 0x158d3, 0x158d9, 0x158df, 0x158e9, 0x158ef, 0x158fb, 0x158fd, 0x1592b, 0x15935, 0x15939, 0x1594b, 0x1594d, 0x15963, 0x15969, 0x15981, 0x1598d, 0x15993, 0x1599f, 0x159a3, 0x159a9, 0x159b7, 0x159cf, 0x159d7, 0x159dd, 0x159eb, 0x159ff, 0x15a09, 0x15a0f, 0x15a11, 0x15a41, 0x15a4b, 0x15a55, 0x15a6f, 0x15a71, 0x15a7d, 0x15a99, 0x15aa5, 0x15ab7, 0x15ad7, 0x15adb, 0x15ae7, 0x15aed, 0x15af3, 0x15b0d, 0x15b15, 0x15b3b, 0x15b45, 0x15b4f, 0x15b57, 0x15b61, 0x15b79, 0x15bab, 0x15bad, 0x15bc7, 0x15bdf, 0x15be9, 0x15c05, 0x15c2d, 0x15c33, 0x15c4b, 0x15c81, 0x15c8b, 0x15c93, 0x15c99, 0x15ca3, 0x15ca5, 0x15cc9, 0x15cd1, 0x15cdd, 0x15ceb, 0x15d01, 0x15d07, 0x15d0d, 0x15d15, 0x15d1f, 0x15d31, 0x15d37, 0x15d3b, 0x15d4f, 0x15d6b, 0x15d7f, 0x15d91, 0x15dab, 0x15dc7, 0x15dd9, 0x15ddf, 0x15df1, 0x15dfd, 0x15e01, 0x15e0b, 0x15e13, 0x15e19, 0x15e25, 0x15e29, 0x15e37, 0x15e57, 0x15e67, 0x15e73, 0x15e83, 0x15eab, 0x15ead, 0x15ec1, 0x15ecb, 0x15ed5, 0x15ee5, 0x15f2b, 0x15f3f, 0x15f41, 0x15f47, 0x15f55, 0x15f8d, 0x15f95, 0x15fa3, 0x15fbd, 0x15fd1, 0x15fdd, 0x15ff3, 0x15ff9, 0x16009, 0x16017, 0x16021, 0x1602b, 0x16039, 0x1604d, 0x16055, 0x16063, 0x16065, 0x16077, 0x1609f, 0x160a3, 0x160b1, 0x160dd, 0x1610b, 0x16119, 0x16137, 0x1613b, 0x1613d, 0x16143, 0x1615b, 0x16161, 0x16167, 0x16175, 0x1617f, 0x161ad, 0x161b5, 0x161c7, 0x161d3, 0x161d5, 0x161ef, 0x161fd, 0x16201, 0x16213, 0x16231, 0x1623b, 0x16245, 0x16249, 0x1624f, 0x1625d, 0x16261, 0x1626b, 0x16273, 0x1627f, 0x1628f, 0x16297, 0x1629d, 0x162ab, 0x162b3, 0x162d5, 0x162d9, 0x162e9, 0x162fd, 0x16327, 0x1632d, 0x16333, 0x16335, 0x1634d, 0x16353, 0x16365, 0x16369, 0x1637b, 0x16381, 0x1638b, 0x163a3, 0x163b1, 0x163ed, 0x163f5, 0x16407, 0x1640d, 0x1641f, 0x16423, 0x1643b, 0x16443, 0x16445, 0x16451, 0x1645d, 0x16497, 0x1649b, 0x164ab, 0x164b9, 0x164e9, 0x164f7, 0x16503, 0x1650f, 0x16511, 0x1651b, 0x1652b, 0x16535, 0x16539, 0x1653f, 0x16547, 0x1654b, 0x16565, 0x16577, 0x16581, 0x1658d, 0x16593, 0x165a5, 0x165bb, 0x165d1, 0x165db, 0x165e7, 0x165f3, 0x16603, 0x16605, 0x16617, 0x16627, 0x16639, 0x1664b, 0x16655, 0x16659, 0x16669, 0x16671, 0x16681, 0x166a9, 0x166bb, 0x166d7, 0x16701, 0x16707, 0x16715, 0x16719, 0x1672f, 0x16731, 0x16743, 0x16749, 0x1674f, 0x1676b, 0x16779, 0x167a1, 0x167bf, 0x167cb, 0x167d9, 0x167df, 0x167e5, 0x167f7, 0x16801, 0x1680b, 0x1681f, 0x16829, 0x1683b, 0x16849, 0x16875, 0x1687f, 0x1688f, 0x1689d, 0x168ad, 0x168b9, 0x168c7, 0x168cb, 0x168d3, 0x168d5, 0x16917, 0x1691b, 0x16933, 0x16947, 0x16955, 0x16965, 0x16969, 0x16971, 0x1698d, 0x1699f, 0x169af, 0x169bb, 0x169c5, 0x169cf, 0x169d1, 0x169d7, 0x16a03, 0x16a05, 0x16a1b, 0x16a2d, 0x16a3f, 0x16a41, 0x16a53, 0x16a69, 0x16a6f, 0x16a77, 0x16a8b, 0x16a99, 0x16aa3, 0x16aa5, 0x16abd, 0x16ac3, 0x16ac9, 0x16acf, 0x16ae7, 0x16af5, 0x16b01, 0x16b0d, 0x16b23, 0x16b29, 0x16b57, 0x16b5b, 0x16b61, 0x16b6b, 0x16b8f, 0x16b9d, 0x16bab, 0x16bb3, 0x16bb5, 0x16bc7, 0x16bef, 0x16bfb, 0x16c03, 0x16c1d, 0x16c39, 0x16c41, 0x16c6f, 0x16c77, 0x16c81, 0x16c93, 0x16ca5, 0x16cb7, 0x16cc9, 0x16ce7, 0x16cf3, 0x16d15, 0x16d31, 0x16d3b, 0x16d43, 0x16d4f, 0x16d79, 0x16d89, 0x16d9d, 0x16db3, 0x16db9, 0x16dbf, 0x16dc1, 0x16dc7, 0x16de5, 0x16df1, 0x16e19, 0x16e2f, 0x16e3d, 0x16e51, 0x16e5d, 0x16e67, 0x16e7f, 0x16e85, 0x16e91, 0x16e97, 0x16e9b, 0x16eb5, 0x16ecb, 0x16ed9, 0x16eef, 0x16efd, 0x16f0f, 0x16f11, 0x16f21, 0x16f2b, 0x16f47, 0x16f55, 0x16f71, 0x16f81, 0x16f87, 0x16f8d, 0x16f95, 0x16fb7, 0x16fc9, 0x16fdb, 0x16fed, 0x16fff, 0x17025, 0x17029, 0x17057, 0x1705b, 0x17061, 0x1706b, 0x1706d, 0x17083, 0x1708f, 0x1709d, 0x170ab, 0x170b3, 0x170b9, 0x170d9, 0x170fd, 0x17103, 0x17111, 0x17117, 0x17127, 0x17133, 0x17159, 0x1715f, 0x1716f, 0x17177, 0x1717b, 0x17181, 0x17195, 0x171b1, 0x171bd, 0x171c3, 0x171dd, 0x171f9, 0x17205, 0x1720f, 0x1721b, 0x1721d, 0x17221, 0x1722d, 0x1725f, 0x17263, 0x17271, 0x17287, 0x1728b, 0x1728d, 0x17299, 0x172a3, 0x172a9, 0x172af, 0x172cf, 0x172dd, 0x172e1, 0x172e7, 0x17329, 0x1733d, 0x17343, 0x17345, 0x17361, 0x17367, 0x1736d, 0x17373, 0x1738f, 0x17391, 0x1739b, 0x173ad, 0x173b9, 0x173bf, 0x173d5, 0x173f1, 0x17403, 0x17411, 0x17439, 0x17447, 0x1744d, 0x17469, 0x1746f, 0x17471, 0x1747d, 0x17481, 0x17487, 0x1748d, 0x17495, 0x174b1, 0x174b7, 0x174bb, 0x174c5, 0x174d7, 0x174eb, 0x174ed, 0x17501, 0x17513, 0x17515, 0x17519, 0x17523, 0x17525, 0x17549, 0x1755d, 0x17583, 0x17597, 0x1759b, 0x175a1, 0x175a7, 0x175b3, 0x175cb, 0x175fd, 0x17607, 0x1760d, 0x17619, 0x1763b, 0x1763d, 0x17643, 0x17657, 0x17661, 0x17667, 0x17675, 0x17689, 0x1769d, 0x176a1, 0x176ab, 0x176c7, 0x176df, 0x176f1, 0x17711, 0x1771b, 0x1771d, 0x17735, 0x1773f, 0x17741, 0x1774b, 0x17781, 0x1778b, 0x17793, 0x177a5, 0x177a9, 0x177b7, 0x177c3, 0x177f5, 0x17809, 0x17811, 0x17835, 0x1783f, 0x17853, 0x1785f, 0x17869, 0x17877, 0x17899, 0x178c3, 0x178c9, 0x178cf, 0x178d1, 0x178db, 0x178dd, 0x178e7, 0x178eb, 0x178ed, 0x17901, 0x17907, 0x1790d, 0x17973, 0x17979, 0x1797f, 0x17991, 0x17997, 0x1799d, 0x179ab, 0x179b9, 0x179e9, 0x179ef, 0x179f1, 0x17a01, 0x17a1f, 0x17a57, 0x17a5b, 0x17a61, 0x17a7f, 0x17a91, 0x17aa7, 0x17aab, 0x17aad, 0x17ab9, 0x17ae3, 0x17ae5, 0x17aef, 0x17af1, 0x17af7, 0x17b05, 0x17b1d, 0x17b33, 0x17b39, 0x17b63, 0x17b7b, 0x17b95, 0x17ba9, 0x17bb1, 0x17bbb, 0x17bbd, 0x17be1, 0x17be7, 0x17beb, 0x17bf3, 0x17bf5, 0x17c07, 0x17c19, 0x17c2f, 0x17c37, 0x17c4f, 0x17c5d, 0x17c7f, 0x17c85, 0x17ca1, 0x17cb5, 0x17cbf, 0x17cc1, 0x17ccb, 0x17cd3, 0x17d03, 0x17d21, 0x17d33, 0x17d47, 0x17d59, 0x17d63, 0x17d7b, 0x17d87, 0x17da5, 0x17da9, 0x17db1, 0x17dc5, 0x17dd7, 0x17ddb, 0x17e1d, 0x17e27, 0x17e2b, 0x17e35, 0x17e41, 0x17e4b, 0x17e59, 0x17e65, 0x17e8b, 0x17e8d, 0x17e93, 0x17eaf, 0x17ebb, 0x17ec3, 0x17ec5, 0x17ed1, 0x17ed7, 0x17ee1, 0x17ee7, 0x17eed, 0x17eff, 0x17f0b, 0x17f0d, 0x17f2f, 0x17f31, 0x17f45, 0x17f5d, 0x17f73, 0x17f75, 0x17fa1, 0x17fb3, 0x17fc7, 0x17fd3, 0x17fe3, 0x17fe5, 0x17ffb, 0x18013, 0x18015, 0x18049, 0x1805d, 0x1806d, 0x18085, 0x180a1, 0x180a7, 0x180ad, 0x180cd, 0x180f7, 0x18103, 0x18105, 0x18117, 0x1811d, 0x1812b, 0x1814d, 0x1816f, 0x1817d, 0x18195, 0x181a3, 0x181a5, 0x181b7, 0x181db, 0x181e1, 0x181f9, 0x18211, 0x1821b, 0x1821d, 0x18227, 0x18235, 0x18241, 0x18255, 0x18265, 0x18277, 0x1828b, 0x18293, 0x1829f, 0x182bb, 0x182c3, 0x182c9, 0x182d1, 0x182e7, 0x182f3, 0x182f5, 0x182ff, 0x18307, 0x18315, 0x18329, 0x1833b, 0x18357, 0x1835d, 0x18361, 0x1836b, 0x18379, 0x18385, 0x18389, 0x183ab, 0x183b3, 0x183b9, 0x183bf, 0x183c1, 0x183d5, 0x183df, 0x183f1, 0x183f7, 0x18411, 0x1841b, 0x18433, 0x1844d, 0x18459, 0x18469, 0x1846f, 0x18477, 0x18493, 0x184af, 0x184b1, 0x184b7, 0x184cf, 0x184d1, 0x184dd, 0x184f9, 0x1850d, 0x18513, 0x18523, 0x18525, 0x18529, 0x18537, 0x18545, 0x1854f, 0x18561, 0x1856d, 0x18579, 0x1857f, 0x18585, 0x1859d, 0x185c7, 0x185cd, 0x185d9, 0x185df, 0x185e9, 0x185fb, 0x1860b, 0x18619, 0x18625, 0x18637, 0x1863d, 0x18657, 0x18683, 0x186ab, 0x186ad, 0x186c1, 0x186d9, 0x186f1, 0x186f7, 0x186fd, 0x1870f, 0x1871d, 0x18721, 0x18733, 0x18741, 0x18753, 0x18787, 0x187af, 0x187b1, 0x187c5, 0x187dd, 0x187f3, 0x1880f, 0x18833, 0x1884d, 0x1885f, 0x18863, 0x18871, 0x18887, 0x188a5, 0x188b1, 0x188c5, 0x188c9, 0x188d7, 0x188e1, 0x188eb, 0x18913, 0x1892f, 0x18931, 0x18943, 0x18949, 0x1895d, 0x18979, 0x1897f, 0x18985, 0x1899b, 0x189a7, 0x189ad, 0x189cb, 0x189d9, 0x189ef, 0x189fb, 0x18a0d, 0x18a29, 0x18a45, 0x18a4f, 0x18a61, 0x18a67, 0x18a6b, 0x18a73, 0x18a75, 0x18a9d, 0x18aad, 0x18ab3, 0x18ac1, 0x18ae3, 0x18ae5, 0x18b03, 0x18b35, 0x18b39, 0x18b47, 0x18b4b, 0x18b59, 0x18b63, 0x18b77, 0x18b81, 0x18b8d, 0x18b9f, 0x18bb7, 0x18bd1, 0x18bdb, 0x18bf5, 0x18c0d, 0x18c23, 0x18c31, 0x18c7f, 0x18c91, 0x18c97, 0x18c9d, 0x18ca1, 0x18cb9, 0x18ccb, 0x18ce3, 0x18cef, 0x18cfb, 0x18d05, 0x18d09, 0x18d1b, 0x18d27, 0x18d35, 0x18d41, 0x18d69, 0x18d7b, 0x18d7d, 0x18d95, 0x18da3, 0x18da5, 0x18db7, 0x18dbd, 0x18dc9, 0x18dcf, 0x18de7, 0x18e05, 0x18e09, 0x18e1b, 0x18e21, 0x18e3f, 0x18e47, 0x18e63, 0x18e69, 0x18e77, 0x18e87, 0x18ea3, 0x18ea9, 0x18eb1, 0x18ebb, 0x18ebd, 0x18ecf, 0x18f0b, 0x18f19, 0x18f1f, 0x18f2f, 0x18f51, 0x18f57, 0x18f5b, 0x18f61, 0x18f73, 0x18f89, 0x18f9b, 0x18fb9, 0x18fcb, 0x18fd3, 0x18fd9, 0x18fe5, 0x18fef, 0x18ffd, 0x19003, 0x1900f, 0x1901b, 0x19027, 0x19047, 0x1904b, 0x19055, 0x19071, 0x1908d, 0x190a5, 0x190af, 0x190b7, 0x190d7, 0x190e7, 0x190f5, 0x1910b, 0x1911f, 0x19123, 0x1913b, 0x19143, 0x1914f, 0x19151, 0x19157, 0x1915d, 0x1916b, 0x19179, 0x1919b, 0x191a1, 0x191ab, 0x191b9, 0x191d3, 0x191e5, 0x191e9, 0x191f1, 0x19219, 0x19225, 0x19231, 0x19243, 0x19251, 0x1926d, 0x19275, 0x19279, 0x19283, 0x19289, 0x19291, 0x192a7, 0x192bf, 0x192df, 0x192fd, 0x19305, 0x1930f, 0x19317, 0x19327, 0x19335, 0x1934b, 0x1934d, 0x19355, 0x1937b, 0x1939f, 0x193a9, 0x193b1, 0x193c5, 0x193cf, 0x193db, 0x193dd, 0x193eb, 0x193f3, 0x193ff, 0x19401, 0x19437, 0x1943b, 0x1943d, 0x19449, 0x19457, 0x19473, 0x19489, 0x19491, 0x194a7, 0x194ab, 0x194ad, 0x194c1, 0x194d5, 0x194ef, 0x19505, 0x19509, 0x19517, 0x1951b, 0x1952b, 0x19539, 0x1953f, 0x19547, 0x1955f, 0x19565, 0x19571, 0x1957b, 0x19587, 0x1958b, 0x1958d, 0x195a9, 0x195c3, 0x195d1, 0x195f3, 0x19605, 0x1962d, 0x19635, 0x1965f, 0x19669, 0x19677, 0x1967d, 0x19681, 0x196b7, 0x196db, 0x196e7, 0x196eb, 0x196f9, 0x19707, 0x1970d, 0x19713, 0x19715, 0x1971f, 0x19737, 0x19761, 0x1977f, 0x19797, 0x197a1, 0x197a7, 0x197ab, 0x197b9, 0x197d5, 0x197e3, 0x197e9, 0x197fd, 0x1980b, 0x19823, 0x1982f, 0x19831, 0x19843, 0x19851, 0x19875, 0x1988f, 0x198b9, 0x198c7, 0x198e9, 0x198fb, 0x1990f, 0x1991d, 0x19927, 0x1992b, 0x1992d, 0x19941, 0x19947, 0x19959, 0x1997d, 0x1998b, 0x1998d, 0x199a9, 0x199bd, 0x199c3, 0x199c9, 0x199d1, 0x199e7, 0x199ff, 0x19a0f, 0x19a1d, 0x19a35, 0x19a6f, 0x19a81, 0x19a8d, 0x19a95, 0x19aa3, 0x19ab7, 0x19abb, 0x19ad7, 0x19b07, 0x19b15, 0x19b31, 0x19b37, 0x19b5b, 0x19b5d, 0x19b6d, 0x19b79, 0x19b7f, 0x19b83, 0x19b9b, 0x19ba1, 0x19bad, 0x19bcb, 0x19bdf, 0x19bef, 0x19bfd, 0x19c09, 0x19c17, 0x19c2b, 0x19c3f, 0x19c53, 0x19c65, 0x19c69, 0x19c8d, 0x19ca3, 0x19cb1, 0x19cc5, 0x19ce1, 0x19ceb, 0x19cf3, 0x19cf5, 0x19d01, 0x19d19, 0x19d29, 0x19d2f, 0x19d3d, 0x19d45, 0x19d49, 0x19d7f, 0x19d8f, 0x19d9d, 0x19db9, 0x19dcb, 0x19de3, 0x19def, 0x19df1, 0x19df7, 0x19dfd, 0x19e07, 0x19e4f, 0x19e57, 0x19e5b, 0x19e61, 0x19e6d, 0x19e73, 0x19e79, 0x19e83, 0x19e91, 0x19ea7, 0x19eb5, 0x19eb9, 0x19ec7, 0x19ecb, 0x19edf, 0x19eef, 0x19ef1, 0x19efb, 0x19f05, 0x19f11, 0x19f17, 0x19f21, 0x19f4d, 0x19f53, 0x19f59, 0x19f65, 0x19f6f, 0x19f77, 0x19f7b, 0x19f93, 0x19f99, 0x19fa5, 0x19fb7, 0x19fbd, 0x19fc3, 0x19fc9, 0x19ff5, 0x1a011, 0x1a021, 0x1a02d, 0x1a033, 0x1a041, 0x1a059, 0x1a069, 0x1a06f, 0x1a07b, 0x1a095, 0x1a0c3, 0x1a0cf, 0x1a0db, 0x1a0f5, 0x1a0f9, 0x1a107, 0x1a10d, 0x1a113, 0x1a11f, 0x1a125, 0x1a129, 0x1a131, 0x1a137, 0x1a145, 0x1a185, 0x1a197, 0x1a1ab, 0x1a1d9, 0x1a1e3, 0x1a1fd, 0x1a23b, 0x1a275, 0x1a283, 0x1a289, 0x1a291, 0x1a29d, 0x1a2a7, 0x1a2ad, 0x1a2bf, 0x1a2c1, 0x1a2c7, 0x1a2fd, 0x1a31b, 0x1a321, 0x1a32b, 0x1a333, 0x1a347, 0x1a353, 0x1a35f, 0x1a365, 0x1a369, 0x1a377, 0x1a37b, 0x1a38b, 0x1a38d, 0x1a3b1, 0x1a3cf, 0x1a3dd, 0x1a3e1, 0x1a3f9, 0x1a413, 0x1a419, 0x1a449, 0x1a44f, 0x1a451, 0x1a467, 0x1a475, 0x1a479, 0x1a485, 0x1a4b5, 0x1a4bf, 0x1a4c1, 0x1a4cb, 0x1a4cd, 0x1a4e5, 0x1a4e9, 0x1a4f7, 0x1a4fd, 0x1a505, 0x1a517, 0x1a527, 0x1a52b, 0x1a535, 0x1a54d, 0x1a555, 0x1a559, 0x1a571, 0x1a581, 0x1a593, 0x1a5a3, 0x1a5b1, 0x1a5b7, 0x1a5bb, 0x1a5c5, 0x1a5cf, 0x1a5dd, 0x1a62d, 0x1a639, 0x1a63f, 0x1a64b, 0x1a655, 0x1a663, 0x1a665, 0x1a66f, 0x1a671, 0x1a67d, 0x1a699, 0x1a6a5, 0x1a6b7, 0x1a6c5, 0x1a6e1, 0x1a6ed, 0x1a6f3, 0x1a6f5, 0x1a71f, 0x1a723, 0x1a731, 0x1a757, 0x1a75d, 0x1a767, 0x1a773, 0x1a797, 0x1a7b3, 0x1a7b9, 0x1a7bf, 0x1a7cd, 0x1a7e3, 0x1a7e9, 0x1a801, 0x1a80d, 0x1a815, 0x1a831, 0x1a83b, 0x1a84f, 0x1a857, 0x1a861, 0x1a873, 0x1a8a7, 0x1a8d5, 0x1a8df, 0x1a8ef, 0x1a8fd, 0x1a903, 0x1a917, 0x1a91b, 0x1a933, 0x1a935, 0x1a94b, 0x1a94d, 0x1a953, 0x1a98b, 0x1a999, 0x1a9af, 0x1a9b1, 0x1a9c9, 0x1a9e7, 0x1a9ed, 0x1a9f5, 0x1aa09, 0x1aa0f, 0x1aa1b, 0x1aa1d, 0x1aa21, 0x1aa4d, 0x1aa53, 0x1aa77, 0x1aa7b, 0x1aa8d, 0x1aa99, 0x1aa9f, 0x1aabd, 0x1aac3, 0x1aad1, 0x1aad7, 0x1aadd, 0x1aaf5, 0x1ab0b, 0x1ab13, 0x1ab19, 0x1ab1f, 0x1ab3d, 0x1ab51, 0x1ab57, 0x1ab75, 0x1ab83, 0x1ab8f, 0x1aba7, 0x1abad, 0x1abb5, 0x1abbf, 0x1abd3, 0x1abd5, 0x1abd9, 0x1abef, 0x1ac1d, 0x1ac27, 0x1ac35, 0x1ac47, 0x1ac5f, 0x1ac69, 0x1ac81, 0x1ac8d, 0x1ac95, 0x1aca3, 0x1acb1, 0x1accf, 0x1acd7, 0x1ad13, 0x1ad2f, 0x1ad31, 0x1ad4f, 0x1ad5b, 0x1ad6b, 0x1ad75, 0x1ad79, 0x1ad83, 0x1ad91, 0x1ada7, 0x1adad, 0x1adc1, 0x1adcd, 0x1addf, 0x1ade5, 0x1adfb, 0x1ae15, 0x1ae23, 0x1ae3d, 0x1ae4f, 0x1ae5d, 0x1ae61, 0x1ae73, 0x1ae75, 0x1ae8f, 0x1ae97, 0x1ae9b, 0x1aebf, 0x1aed3, 0x1aedf, 0x1aee5, 0x1aee9, 0x1aef1, 0x1aefb, 0x1af11, 0x1af35, 0x1af39, 0x1af41, 0x1af65, 0x1af93, 0x1af99, 0x1afaf, 0x1afbd, 0x1afc5, 0x1afd1, 0x1afd7, 0x1afe1, 0x1afff, 0x1b013, 0x1b029, 0x1b03b, 0x1b043, 0x1b051, 0x1b05b, 0x1b07f, 0x1b083, 0x1b09b, 0x1b09d, 0x1b0ab, 0x1b0ad, 0x1b0b9, 0x1b0c7, 0x1b0d9, 0x1b0e3, 0x1b0ef, 0x1b109, 0x1b12b, 0x1b12d, 0x1b14d, 0x1b153, 0x1b15f, 0x1b163, 0x1b171, 0x1b177, 0x1b18b, 0x1b199, 0x1b1af, 0x1b1bb, 0x1b1cf, 0x1b1dd, 0x1b1e1, 0x1b1e7, 0x1b205, 0x1b209, 0x1b211, 0x1b21b, 0x1b24d, 0x1b259, 0x1b265, 0x1b277, 0x1b27b, 0x1b287, 0x1b2b1, 0x1b2b7, 0x1b2c9, 0x1b2d1, 0x1b2db, 0x1b2eb, 0x1b2ed, 0x1b313, 0x1b323, 0x1b345, 0x1b349, 0x1b34f, 0x1b35d, 0x1b37f, 0x1b38f, 0x1b397, 0x1b39d, 0x1b3b3, 0x1b3c7, 0x1b3e3, 0x1b3e9, 0x1b3f1, 0x1b40f, 0x1b41b, 0x1b41d, 0x1b435, 0x1b439, 0x1b455, 0x1b46f, 0x1b481, 0x1b499, 0x1b4bd, 0x1b4c9, 0x1b4f3, 0x1b50d, 0x1b531, 0x1b53b, 0x1b56b, 0x1b597, 0x1b5ad, 0x1b5b3, 0x1b5b9, 0x1b5bf, 0x1b5c7, 0x1b5d5, 0x1b5e3, 0x1b5e5, 0x1b5e9, 0x1b607, 0x1b60b, 0x1b619, 0x1b625, 0x1b631, 0x1b63d, 0x1b64f, 0x1b679, 0x1b67f, 0x1b691, 0x1b69b, 0x1b6b5, 0x1b6bf, 0x1b6d3, 0x1b6e5, 0x1b6ef, 0x1b6f7, 0x1b703, 0x1b70f, 0x1b711, 0x1b727, 0x1b74d, 0x1b755, 0x1b769, 0x1b76f, 0x1b77d, 0x1b793, 0x1b795, 0x1b7a3, 0x1b7a5, 0x1b7a9, 0x1b7d7, 0x1b7db, 0x1b7e1, 0x1b7ed, 0x1b7f9, 0x1b81b, 0x1b821, 0x1b82b, 0x1b82d, 0x1b84d, 0x1b853, 0x1b85f, 0x1b869, 0x1b88b, 0x1b88d, 0x1b8a9, 0x1b8d7, 0x1b8dd, 0x1b8e7, 0x1b8f9, 0x1b901, 0x1b90d, 0x1b913, 0x1b91f, 0x1b929, 0x1b92f, 0x1b949, 0x1b95b, 0x1b96d, 0x1b975, 0x1b983, 0x1b991, 0x1b9b5, 0x1b9b9, 0x1b9c7, 0x1b9f1, 0x1b9f7, 0x1ba07, 0x1ba15, 0x1ba31, 0x1ba45, 0x1ba57, 0x1ba5d, 0x1ba83, 0x1ba85, 0x1ba97, 0x1baa1, 0x1bab3, 0x1bab9, 0x1bacd, 0x1bae3, 0x1bafd, 0x1bb05, 0x1bb1b, 0x1bb27, 0x1bb2d, 0x1bb3f, 0x1bb4b, 0x1bb4d, 0x1bb6f, 0x1bb81, 0x1bba5, 0x1bbb1, 0x1bbbd, 0x1bbcf, 0x1bc07, 0x1bc0b, 0x1bc19, 0x1bc29, 0x1bc45, 0x1bc61, 0x1bc67, 0x1bc85, 0x1bc89, 0x1bc9b, 0x1bcab, 0x1bcc1, 0x1bcc7, 0x1bcf1, 0x1bcf7, 0x1bd09, 0x1bd1d, 0x1bd3f, 0x1bd53, 0x1bd63, 0x1bd65, 0x1bd7d, 0x1bd8b, 0x1bd8d, 0x1bd93, 0x1bda9, 0x1bdaf, 0x1bdbd, 0x1bdd7, 0x1bdf3, 0x1be0f, 0x1be17, 0x1be21, 0x1be33, 0x1be35, 0x1be39, 0x1be5f, 0x1be63, 0x1be69, 0x1be6f, 0x1be81, 0x1be95, 0x1bea5, 0x1bec9, 0x1beeb, 0x1bef3, 0x1bef9, 0x1beff, 0x1bf19, 0x1bf23, 0x1bf25, 0x1bf2f, 0x1bf31, 0x1bf37, 0x1bf43, 0x1bf4f, 0x1bf51, 0x1bf67, 0x1bf6b, 0x1bf79, 0x1bf7f, 0x1bf85, 0x1bfa7, 0x1bfad, 0x1bfc1, 0x1bfdf, 0x1bfe5, 0x1bffd, 0x1c017, 0x1c021, 0x1c027, 0x1c035, 0x1c04d, 0x1c069, 0x1c071, 0x1c07b, 0x1c07d, 0x1c087, 0x1c09f, 0x1c0b1, 0x1c0bb, 0x1c0c9, 0x1c0cf, 0x1c0db, 0x1c0dd, 0x1c0f3, 0x1c107, 0x1c10b, 0x1c115, 0x1c119, 0x1c137, 0x1c13d, 0x1c151, 0x1c175, 0x1c179, 0x1c17f, 0x1c183, 0x1c185, 0x1c1b3, 0x1c1cd, 0x1c1d3, 0x1c1d9, 0x1c1df, 0x1c1e9, 0x1c207, 0x1c21f, 0x1c223, 0x1c231, 0x1c24f, 0x1c251, 0x1c25d, 0x1c267, 0x1c285, 0x1c29b, 0x1c29d, 0x1c2c1, 0x1c2d5, 0x1c2df, 0x1c2e3, 0x1c2e5, 0x1c309, 0x1c341, 0x1c353, 0x1c371, 0x1c37d, 0x1c381, 0x1c39f, 0x1c3a5, 0x1c3b7, 0x1c3c3, 0x1c3c9, 0x1c3d7, 0x1c3e1, 0x1c3ed, 0x1c401, 0x1c413, 0x1c425, 0x1c429, 0x1c445, 0x1c45d, 0x1c46b, 0x1c479, 0x1c47f, 0x1c491, 0x1c4a7, 0x1c4d5, 0x1c4e3, 0x1c4e9, 0x1c511, 0x1c527, 0x1c52d, 0x1c533, 0x1c541, 0x1c54d, 0x1c553, 0x1c559, 0x1c577, 0x1c57d, 0x1c58b, 0x1c599, 0x1c5a3, 0x1c5b1, 0x1c5b7, 0x1c5d1, 0x1c5ed, 0x1c5f5, 0x1c609, 0x1c61b, 0x1c62d, 0x1c633, 0x1c639, 0x1c665, 0x1c67b, 0x1c68b, 0x1c695, 0x1c6af, 0x1c6dd, 0x1c6e1, 0x1c6f3, 0x1c701, 0x1c70d, 0x1c729, 0x1c73b, 0x1c743, 0x1c75b, 0x1c767, 0x1c76d, 0x1c775, 0x1c78f, 0x1c79b, 0x1c7ad, 0x1c7b5, 0x1c7bf, 0x1c7d5, 0x1c7e5, 0x1c7fd, 0x1c807, 0x1c813, 0x1c815, 0x1c819, 0x1c825, 0x1c837, 0x1c867, 0x1c86b, 0x1c879, 0x1c883, 0x1c889, 0x1c897, 0x1c8bf, 0x1c8cd, 0x1c8df, 0x1c8ef, 0x1c8f7, 0x1c8fd, 0x1c911, 0x1c91d, 0x1c933, 0x1c947, 0x1c94b, 0x1c955, 0x1c95f, 0x1c963, 0x1c969, 0x1c96f, 0x1c98d, 0x1c993, 0x1c995, 0x1c9a9, 0x1c9b1, 0x1c9bb, 0x1c9d7, 0x1c9db, 0x1ca03, 0x1ca11, 0x1ca2b, 0x1ca41, 0x1ca47, 0x1ca53, 0x1ca59, 0x1ca65, 0x1ca8b, 0x1ca93, 0x1caa5, 0x1caa9, 0x1caaf, 0x1cab7, 0x1cabd, 0x1cae1, 0x1cae7, 0x1caf3, 0x1cb15, 0x1cb19, 0x1cb1f, 0x1cb23, 0x1cb51, 0x1cb5d, 0x1cb6b, 0x1cb85, 0x1cb89, 0x1cbab, 0x1cbbf, 0x1cbd3, 0x1cbe9, 0x1cbfb, 0x1cc27, 0x1cc41, 0x1cc4b, 0x1cc65, 0x1cc6f, 0x1cc7b, 0x1cc87, 0x1cc99, 0x1cca3, 0x1cca5, 0x1ccb7, 0x1ccdd, 0x1cced, 0x1ccf5, 0x1ccff, 0x1cd0d, 0x1cd25, 0x1cd37, 0x1cd4f, 0x1cd57, 0x1cd79, 0x1cd9d, 0x1cdc7, 0x1cdcb, 0x1cdd5, 0x1cddf, 0x1cde9, 0x1cdef, 0x1cdf1, 0x1cdfb, 0x1ce13, 0x1ce3b, 0x1ce3d, 0x1ce49, 0x1ce57, 0x1ce6d, 0x1ce7f, 0x1ce83, 0x1ce89, 0x1ce9d, 0x1cea7, 0x1cead, 0x1ceb5, 0x1cebf, 0x1ced3, 0x1ceef, 0x1cef1, 0x1cefd, 0x1cf05, 0x1cf09, 0x1cf1b, 0x1cf21, 0x1cf2b, 0x1cf33, 0x1cf3f, 0x1cf4d, 0x1cf63, 0x1cf65, 0x1cfbd, 0x1cfe1, 0x1d007, 0x1d00d, 0x1d019, 0x1d02f, 0x1d073, 0x1d089, 0x1d08f, 0x1d0bf, 0x1d0cd, 0x1d0d5, 0x1d0e5, 0x1d0e9, 0x1d0fb, 0x1d103, 0x1d10f, 0x1d11d, 0x1d12b, 0x1d12d, 0x1d139, 0x1d14b, 0x1d153, 0x1d171, 0x1d181, 0x1d193, 0x1d19f, 0x1d1c5, 0x1d1d7, 0x1d1f3, 0x1d211, 0x1d221, 0x1d227, 0x1d24d, 0x1d255, 0x1d263, 0x1d26f, 0x1d28d, 0x1d295, 0x1d29f, 0x1d2bb, 0x1d2cf, 0x1d2e1, 0x1d2eb, 0x1d2ed, 0x1d301, 0x1d30b, 0x1d32f, 0x1d33d, 0x1d35b, 0x1d35d, 0x1d37f, 0x1d389, 0x1d39b, 0x1d3a1, 0x1d3bf, 0x1d3c1, 0x1d3cb, 0x1d3d3, 0x1d3d9, 0x1d3e9, 0x1d3ef, 0x1d41d, 0x1d42b, 0x1d43f, 0x1d441, 0x1d453, 0x1d455, 0x1d45f, 0x1d471, 0x1d499, 0x1d4b1, 0x1d4bb, 0x1d4bd, 0x1d4c3, 0x1d4dd, 0x1d4e7, 0x1d4f3, 0x1d4f5, 0x1d4f9, 0x1d513, 0x1d51f, 0x1d52f, 0x1d549, 0x1d557, 0x1d567, 0x1d57f, 0x1d583, 0x1d589, 0x1d591, 0x1d5ab, 0x1d5ad, 0x1d5b5, 0x1d5bf, 0x1d5cb, 0x1d5e3, 0x1d5e5, 0x1d5ef, 0x1d5f1, 0x1d5f7, 0x1d601, 0x1d613, 0x1d615, 0x1d623, 0x1d625, 0x1d63b, 0x1d65d, 0x1d66b, 0x1d691, 0x1d6ab, 0x1d6b3, 0x1d6b5, 0x1d6cd, 0x1d6d9, 0x1d6fd, 0x1d717, 0x1d727, 0x1d72d, 0x1d735, 0x1d759, 0x1d75f, 0x1d77b, 0x1d77d, 0x1d787, 0x1d7b1, 0x1d7b7, 0x1d7c9, 0x1d7d1, 0x1d7db, 0x1d7eb, 0x1d80f, 0x1d827, 0x1d839, 0x1d853, 0x1d87d, 0x1d899, 0x1d8b7, 0x1d8c3, 0x1d8c5, 0x1d8d1, 0x1d8f5, 0x1d8ff, 0x1d907, 0x1d90b, 0x1d90d, 0x1d91f, 0x1d931, 0x1d943, 0x1d945, 0x1d94f, 0x1d967, 0x1d975, 0x1d9b3, 0x1d9d3, 0x1d9f7, 0x1d9fb, 0x1da13, 0x1da15, 0x1da19, 0x1da31, 0x1da37, 0x1da43, 0x1da49, 0x1da5d, 0x1da67, 0x1da6d, 0x1da85, 0x1da9b, 0x1daa7, 0x1dab3, 0x1dab5, 0x1dacb, 0x1dad3, 0x1dad9, 0x1daef, 0x1db03, 0x1db0f, 0x1db35, 0x1db39, 0x1db47, 0x1db4b, 0x1db55, 0x1db63, 0x1db87, 0x1dba3, 0x1dba5, 0x1dbc5, 0x1dbd7, 0x1dbdd, 0x1dbed, 0x1dbf3, 0x1dbf9, 0x1dc0d, 0x1dc19, 0x1dc2f, 0x1dc3d, 0x1dc43, 0x1dc45, 0x1dc6d, 0x1dc7f, 0x1dc83, 0x1dc91, 0x1dc9b, 0x1dcab, 0x1dcad, 0x1dcb9, 0x1dcd3, 0x1dce3, 0x1dce5, 0x1dd0f, 0x1dd1b, 0x1dd1d, 0x1dd21, 0x1dd39, 0x1dd3f, 0x1dd47, 0x1dd4d, 0x1dd69, 0x1dd71, 0x1dd8b, 0x1dd95, 0x1dd99, 0x1dda3, 0x1ddcf, 0x1dde1, 0x1ddf3, 0x1de03, 0x1de05, 0x1de27, 0x1de4b, 0x1de4d, 0x1de65, 0x1de69, 0x1de7b, 0x1de81, 0x1de95, 0x1dea5, 0x1deaf, 0x1debd, 0x1dec3, 0x1decf, 0x1ded1, 0x1dedb, 0x1df01, 0x1df29, 0x1df37, 0x1df3b, 0x1df45, 0x1df49, 0x1df57, 0x1df73, 0x1df83, 0x1df85, 0x1df8f, 0x1df91, 0x1dfa1, 0x1dfcd, 0x1dfef, 0x1e013, 0x1e015, 0x1e023, 0x1e037, 0x1e045, 0x1e049, 0x1e05b, 0x1e061, 0x1e07f, 0x1e09d, 0x1e0ab, 0x1e0b3, 0x1e0b5, 0x1e0d5, 0x1e0d9, 0x1e0df, 0x1e0e9, 0x1e0fb, 0x1e117, 0x1e133, 0x1e14d, 0x1e165, 0x1e177, 0x1e193, 0x1e1a5, 0x1e1b7, 0x1e1c3, 0x1e1d1, 0x1e1db, 0x1e1ed, 0x1e209, 0x1e217, 0x1e21d, 0x1e22d, 0x1e233, 0x1e239, 0x1e25f, 0x1e28d, 0x1e2c5, 0x1e2eb, 0x1e2f9, 0x1e301, 0x1e31f, 0x1e325, 0x1e32f, 0x1e345, 0x1e351, 0x1e361, 0x1e373, 0x1e38f, 0x1e39b, 0x1e39d, 0x1e3ab, 0x1e3ad, 0x1e3c1, 0x1e3c7, 0x1e3e5, 0x1e3f7, 0x1e411, 0x1e42b, 0x1e439, 0x1e44b, 0x1e469, 0x1e46f, 0x1e471, 0x1e47d, 0x1e487, 0x1e48d, 0x1e4a3, 0x1e4cf, 0x1e4db, 0x1e4eb, 0x1e4f3, 0x1e501, 0x1e513, 0x1e537, 0x1e543, 0x1e567, 0x1e56b, 0x1e56d, 0x1e575, 0x1e57f, 0x1e59b, 0x1e5b5, 0x1e5bf, 0x1e5cd, 0x1e5d9, 0x1e5df, 0x1e5e5, 0x1e5fb, 0x1e607, 0x1e60b, 0x1e62f, 0x1e631, 0x1e63d, 0x1e643, 0x1e64f, 0x1e66b, 0x1e685, 0x1e689, 0x1e697, 0x1e69d, 0x1e6a1, 0x1e6ad, 0x1e6b9, 0x1e6d5, 0x1e6e3, 0x1e6f1, 0x1e6f7, 0x1e709, 0x1e71b, 0x1e72d, 0x1e735, 0x1e73f, 0x1e741, 0x1e74b, 0x1e755, 0x1e763, 0x1e76f, 0x1e777, 0x1e781, 0x1e78b, 0x1e793, 0x1e79f, 0x1e7a5, 0x1e7af, 0x1e7bb, 0x1e7e1, 0x1e817, 0x1e833, 0x1e85f, 0x1e86f, 0x1e877, 0x1e87d, 0x1e881, 0x1e895, 0x1e899, 0x1e8a5, 0x1e8cf, 0x1e8d1, 0x1e8ed, 0x1e901, 0x1e923, 0x1e925, 0x1e929, 0x1e93b, 0x1e957, 0x1e96b, 0x1e973, 0x1e97f, 0x1e98f, 0x1e997, 0x1e9cd, 0x1e9e3, 0x1e9ef, 0x1e9f1, 0x1e9fb, 0x1e9fd, 0x1ea13, 0x1ea19, 0x1ea1f, 0x1ea29, 0x1ea43, 0x1ea51, 0x1ea61, 0x1ea89, 0x1ea91, 0x1ea9d, 0x1eaa7, 0x1eab9, 0x1eac1, 0x1eac7, 0x1ead5, 0x1eae5, 0x1eaef, 0x1eaf7, 0x1eafd, 0x1eb05, 0x1eb1b, 0x1eb21, 0x1eb2b, 0x1eb2d, 0x1eb41, 0x1eb55, 0x1eb7b, 0x1eb9f, 0x1eba9, 0x1ebb1, 0x1ebc3, 0x1ebc5, 0x1ebcf, 0x1ebeb, 0x1ec0b, 0x1ec2f, 0x1ec31, 0x1ec43, 0x1ec4f, 0x1ec5b, 0x1ec5d, 0x1ec67, 0x1ec6d, 0x1ec79, 0x1ec97, 0x1ecad, 0x1ecb3, 0x1ecb5, 0x1eccb, 0x1ecd9, 0x1ece5, 0x1ece9, 0x1ecfb, 0x1ed03, 0x1ed11, 0x1ed1d, 0x1ed27, 0x1ed39, 0x1ed5f, 0x1ed6f, 0x1ed81, 0x1ed95, 0x1ed9f, 0x1eda9, 0x1edbb, 0x1edcf, 0x1eddb, 0x1edf3, 0x1ee1b, 0x1ee27, 0x1ee2b, 0x1ee35, 0x1ee53, 0x1ee63, 0x1ee71, 0x1ee99, 0x1eeaf, 0x1eeb7, 0x1eebd, 0x1eedb, 0x1eee7, 0x1eeed, 0x1eef3, 0x1ef0d, 0x1ef23, 0x1ef2f, 0x1ef3d, 0x1ef57, 0x1ef67, 0x1ef73, 0x1ef79, 0x1ef85, 0x1ef97, 0x1efa1, 0x1efab, 0x1efad, 0x1efb3, 0x1efb9, 0x1efc1, 0x1efdf, 0x1efe3, 0x1eff7, 0x1f009, 0x1f02d, 0x1f039, 0x1f04d, 0x1f055, 0x1f05f, 0x1f065, 0x1f069, 0x1f087, 0x1f095, 0x1f0af, 0x1f0bd, 0x1f0d1, 0x1f0f9, 0x1f10b, 0x1f113, 0x1f137, 0x1f13b, 0x1f145, 0x1f157, 0x1f161, 0x1f175, 0x1f189, 0x1f18f, 0x1f1a7, 0x1f1ab, 0x1f1bf, 0x1f1cb, 0x1f1d3, 0x1f1e3, 0x1f207, 0x1f20d, 0x1f215, 0x1f229, 0x1f245, 0x1f249, 0x1f27f, 0x1f283, 0x1f297, 0x1f2a1, 0x1f2ab, 0x1f2b9, 0x1f2d5, 0x1f2df, 0x1f2f1, 0x1f317, 0x1f32d, 0x1f335, 0x1f359, 0x1f369, 0x1f36f, 0x1f381, 0x1f387, 0x1f393, 0x1f395, 0x1f399, 0x1f3a3, 0x1f3af, 0x1f3cf, 0x1f3f9, 0x1f41f, 0x1f423, 0x1f42f, 0x1f43b, 0x1f43d, 0x1f451, 0x1f457, 0x1f46b, 0x1f489, 0x1f48f, 0x1f49d, 0x1f4a1, 0x1f4c1, 0x1f4d3, 0x1f4e9, 0x1f4f1, 0x1f4fb, 0x1f511, 0x1f51b, 0x1f51d, 0x1f527, 0x1f539, 0x1f553, 0x1f565, 0x1f56f, 0x1f571, 0x1f58b, 0x1f5c5, 0x1f5d7, 0x1f5ff, 0x1f60f, 0x1f621, 0x1f627, 0x1f62b, 0x1f635, 0x1f639, 0x1f655, 0x1f687, 0x1f693, 0x1f69f, 0x1f6a5, 0x1f6c5, 0x1f6dd, 0x1f6eb, 0x1f6f3, 0x1f6f9, 0x1f707, 0x1f715, 0x1f725, 0x1f743, 0x1f74f, 0x1f751, 0x1f767, 0x1f76b, 0x1f775, 0x1f783, 0x1f791, 0x1f7b3, 0x1f7b5, 0x1f7bf, 0x1f7cd, 0x1f7e9, 0x1f7ef, 0x1f7fb, 0x1f801, 0x1f825, 0x1f83d, 0x1f849, 0x1f857, 0x1f873, 0x1f8ad, 0x1f8bf, 0x1f8cb, 0x1f8d9, 0x1f8e3, 0x1f8f1, 0x1f905, 0x1f939, 0x1f93f, 0x1f94d, 0x1f953, 0x1f971, 0x1f977, 0x1f97b, 0x1f9bb, 0x1f9cf, 0x1f9d1, 0x1f9dd, 0x1f9e7, 0x1f9f5, 0x1fa17, 0x1fa21, 0x1fa27, 0x1fa3f, 0x1fa41, 0x1fa4b, 0x1fa59, 0x1fa7d, 0x1fa8b, 0x1fa93, 0x1fa95, 0x1faa5, 0x1fab1, 0x1fadb, 0x1fae7, 0x1faeb, 0x1fb01, 0x1fb1f, 0x1fb49, 0x1fb4f, 0x1fb57, 0x1fb5b, 0x1fb6d, 0x1fb7f, 0x1fb83, 0x1fb85, 0x1fb97, 0x1fb9d, 0x1fba7, 0x1fbab, 0x1fbc7, 0x1fbcb, 0x1fbcd, 0x1fbdf, 0x1fbe9, 0x1fbf1, 0x1fc0f, 0x1fc11, 0x1fc1b, 0x1fc2d, 0x1fc47, 0x1fc55, 0x1fc59, 0x1fc63, 0x1fc71, 0x1fc77, 0x1fc7d, 0x1fc8d, 0x1fc9f, 0x1fca9, 0x1fcbd, 0x1fcc9, 0x1fcdb, 0x1fce1, 0x1fce7, 0x1fced, 0x1fd07, 0x1fd0d, 0x1fd23, 0x1fd29, 0x1fd2f, 0x1fd3d, 0x1fd43, 0x1fd4f, 0x1fd57, 0x1fd73, 0x1fd75, 0x1fd85, 0x1fd97, 0x1fd9b, 0x1fdb3, 0x1fdbf, 0x1fdc1, 0x1fdd3, 0x1fde9, 0x1fdfb, 0x1fe15, 0x1fe25, 0x1fe31, 0x1fe37, 0x1fe49, 0x1fe67, 0x1fe79, 0x1fe83, 0x1fe91, 0x1fea1, 0x1fec1, 0x1fee5, 0x1fee9, 0x1fefb, 0x1fefd, 0x1ff05, 0x1ff33, 0x1ff35, 0x1ff39, 0x1ff5f, 0x1ff71, 0x1ff93, 0x1ffa5, 0x1ffb1, 0x1ffeb, 0x1ffed }; const word* irreducible_polynomials[17] = {NULL, NULL, _irreducible_polynomials_degree_02, _irreducible_polynomials_degree_03, _irreducible_polynomials_degree_04, _irreducible_polynomials_degree_05, _irreducible_polynomials_degree_06, _irreducible_polynomials_degree_07, _irreducible_polynomials_degree_08, _irreducible_polynomials_degree_09, _irreducible_polynomials_degree_10, _irreducible_polynomials_degree_11, _irreducible_polynomials_degree_12, _irreducible_polynomials_degree_13, _irreducible_polynomials_degree_14, _irreducible_polynomials_degree_15, _irreducible_polynomials_degree_16}; libm4rie-20130416/src/gf2e.h000066400000000000000000000106461212302364300152510ustar00rootroot00000000000000/** * \file gf2e.h * * \brief \GF2E * * \author Martin Albrecht */ #ifndef M4RIE_GF2E_H #define M4RIE_GF2E_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010,2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GPL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include #define M4RIE_MAX_DEGREE 16 /** * \brief \GF2E */ typedef struct gf2e_struct gf2e; struct gf2e_struct { unsigned int degree; /**< The degree \e. */ word minpoly; /**< Irreducible polynomial of degree \e. */ word *pow_gen; /**< pow_gen[i] holds \f$a^i / \f$ for \f$a\f$ a generator of this field. */ word *red; /**< red[i] holds precomputed reductors for the minpoly. \f$\f$. */ word **_mul; /**< mul[a][b] holds \f$ a \cdot b\f for small fields$. */ word (*inv)(const gf2e *ff, const word a); /**< implements a^(-1) for a in \GF2E */ word (*mul)(const gf2e *ff, const word a, const word b); /**< implements a*b for a in \GF2E */ }; /** * Create finite field from minimal polynomial * * \param minpoly Polynomial represented as series of bits. */ gf2e *gf2e_init(const word minpoly); /** * Free ff * * \param ff Finite field. */ void gf2e_free(gf2e *ff); /** * \brief a^(-1) % minpoly */ static inline word gf2e_inv(const gf2e *ff, word a) { return gf2x_invmod(a, ff->minpoly, ff->degree); } /** * \brief a*b in \GF2E using a table lookups. */ static inline word _gf2e_mul_table(const gf2e *ff, const word a, const word b) { return ff->_mul[a][b]; } /** * \brief a*b in \GF2E using a gf2x_mul() lookups. */ static inline word _gf2e_mul_arith(const gf2e *ff, const word a, const word b) { const word res = gf2x_mul(a, b, ff->degree); return res ^ ff->red[res>>ff->degree]; } /** * \brief a*b in \GF2E. */ static inline word gf2e_mul(const gf2e *ff, const word a, const word b) { if( ff->_mul != NULL ) return _gf2e_mul_table(ff, a, b); else return _gf2e_mul_arith(ff, a, b); } /** * Return the width used for storing elements of ff * * \param ff Finite field. */ static inline size_t gf2e_degree_to_w(const gf2e *ff) { switch(ff->degree) { case 2: return 2; case 3: case 4: return 4; case 5: case 6: case 7: case 8: return 8; case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16: return 16; default: m4ri_die("degree %d not supported.\n",ff->degree); } return 0; } /** * Compute all multiples by a of vectors fitting into 16 bits. * * \param ff Finite field. * \param a Finite field element. */ static inline word *gf2e_t16_init(const gf2e *ff, const word a) { word *mul = (word*)m4ri_mm_calloc(1<<16, sizeof(word)); const unsigned int w = gf2e_degree_to_w(ff); const word mask_w = (1<>0)&mask_w))<<0 | gf2e_mul(ff, a, ((i>> 2)&mask_w))<< 2 | gf2e_mul(ff, a, ((i>> 4)&mask_w))<< 4 | gf2e_mul(ff, a, ((i>> 6)&mask_w))<< 6; mul[i] |= gf2e_mul(ff, a, ((i>>8)&mask_w))<<8 | gf2e_mul(ff, a, ((i>>10)&mask_w))<<10 | gf2e_mul(ff, a, ((i>>12)&mask_w))<<12 | gf2e_mul(ff, a, ((i>>14)&mask_w))<<14; break; case 4: mul[i] = gf2e_mul(ff, a, (i&mask_w)) | gf2e_mul(ff, a, ((i>>4)&mask_w))<<4 | gf2e_mul(ff, a, ((i>>8)&mask_w))<<8 | gf2e_mul(ff, a, ((i>>12)&mask_w))<<12; break; case 8: mul[i] = gf2e_mul(ff, a, (i&mask_w)) | gf2e_mul(ff, a, ((i>>8)&mask_w))<<8; break; case 16: mul[i] = gf2e_mul(ff, a, (i&mask_w)); break; }; } return mul; } /** * \brief Free multiplication table. * * \param mul Multiplication table */ static inline void gf2e_t16_free(word *mul) { m4ri_mm_free(mul); } extern const word* irreducible_polynomials[17]; #endif //M4RIE_GF2E_H libm4rie-20130416/src/gf2x.h000066400000000000000000000127371212302364300152770ustar00rootroot00000000000000/** * \file gf2x.h * * \brief \GF2X for degrees < 64 * * \author Martin Albrecht * * \warning Do not rely on these functions for high performance, they are not fully optimised. */ #ifndef M4RIE_GF2X_H #define M4RIE_GF2X_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2012 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #define __M4RIE_1tF(X) ~((X)-1) /**< Maps 1 to word with all ones and 0 to 0. */ /** * \brief a*b in \GF2X with deg(a) and deg(b) < d. */ static inline word gf2x_mul(const word a, const word b, unsigned int d) { word res = 0; switch(d) { case 32: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(31)) >>31) & (b<<31); case 31: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(30)) >>30) & (b<<30); case 30: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(29)) >>29) & (b<<29); case 29: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(28)) >>28) & (b<<28); case 28: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(27)) >>27) & (b<<27); case 27: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(26)) >>26) & (b<<26); case 26: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(25)) >>25) & (b<<25); case 25: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(24)) >>24) & (b<<24); case 24: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(23)) >>23) & (b<<23); case 23: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(22)) >>22) & (b<<22); case 22: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(21)) >>21) & (b<<21); case 21: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(20)) >>20) & (b<<20); case 20: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(19)) >>19) & (b<<19); case 19: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(18)) >>18) & (b<<18); case 18: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(17)) >>17) & (b<<17); case 17: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(16)) >>16) & (b<<16); case 16: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(15)) >>15) & (b<<15); case 15: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(14)) >>14) & (b<<14); case 14: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(13)) >>13) & (b<<13); case 13: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(12)) >>12) & (b<<12); case 12: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(11)) >>11) & (b<<11); case 11: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(10)) >>10) & (b<<10); case 10: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 9)) >> 9) & (b<< 9); case 9: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 8)) >> 8) & (b<< 8); case 8: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 7)) >> 7) & (b<< 7); case 7: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 6)) >> 6) & (b<< 6); case 6: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 5)) >> 5) & (b<< 5); case 5: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 4)) >> 4) & (b<< 4); case 4: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 3)) >> 3) & (b<< 3); case 3: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 2)) >> 2) & (b<< 2); case 2: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 1)) >> 1) & (b<< 1); case 1: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 0)) >> 0) & (b<< 0); break; default: m4ri_die("degree %d too big.\n",d); } return res; } /** * \brief deg(a) in \GF2X. * * \param a Polynomial of degree <= 64. */ static inline int gf2x_deg(word a) { int degree = 0; if( (a & 0xffffffff00000000ULL) != 0) { degree += 32; a>>=32; } if( (a & 0xffff0000ULL) != 0) { degree += 16; a>>=16; } if( (a & 0xff00ULL) != 0) { degree += 8; a>>= 8; } if( (a & 0xf0ULL) != 0) { degree += 4; a>>= 4; } if( (a & 0xcULL) != 0) { degree += 2; a>>= 2; } if( (a & 0x2ULL) != 0) { degree += 1; a>>= 1; } return degree; } /** * \brief a / b in \GF2X. */ static inline word gf2x_div(word a, word b) { word res = 0; word mask = 0; const int deg_b = gf2x_deg(b); for(int deg_a=gf2x_deg(a); deg_a>=deg_b; deg_a--) { mask = __M4RIE_1tF(a>>deg_a); res |= mask & __M4RI_TWOPOW(deg_a - deg_b); a ^= mask & b<<(deg_a - deg_b); } return res; } /** * \brief a mod b in \GF2X. */ static inline word gf2x_mod(word a, word b) { const int deg_b = gf2x_deg(b); for(int deg_a=gf2x_deg(a); deg_a>=deg_b; deg_a--) { a ^= __M4RIE_1tF(a>>deg_a) & b<<(deg_a - deg_b); } return a; } /** * \brief a / b and a mod b in \GF2X. */ static inline word gf2x_divmod(word a, word b, word *rem) { word res = 0; word mask = 0; const int deg_b = gf2x_deg(b); for(int deg_a=gf2x_deg(a); deg_a>=deg_b; deg_a--) { mask = __M4RIE_1tF(a>>deg_a); res |= mask & __M4RI_TWOPOW(deg_a - deg_b); a ^= mask & b<<(deg_a - deg_b); } *rem = a; return res; } /** * \brief a^(-1) % b with deg(a), deg(b) <= d. */ static inline word gf2x_invmod(word a, word b, unsigned int d) { word x = 0; word lastx = 1; word y = 1; word lasty = 0; word rem; word quo; word tmp = 0; while (b != 0) { quo = gf2x_divmod(a,b, &rem); a = b; b = rem; tmp = x; x = lastx ^ gf2x_mul(quo, x, d); lastx = tmp; tmp = y; y = lasty ^ gf2x_mul(quo, y, d); lasty = tmp; } return lastx; } #endif //M4RIE_GF2X_H libm4rie-20130416/src/m4ri_functions.h000066400000000000000000000054231212302364300173660ustar00rootroot00000000000000/** * \file m4ri_functions.h * * \brief Utility functions handly mzd_t * * \note Some of these functions might be moved M4RI in the future. * * \author Martin Albrecht */ #ifndef M4RIE_M4RI_FUNCTIONS_H #define M4RIE_M4RI_FUNCTIONS_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010,2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include static inline word __mzd_read_bits(const mzd_t *M, const rci_t x, const rci_t y, const rci_t n) { int const spot = (y + M->offset) % m4ri_radix; wi_t const block = (y + M->offset) / m4ri_radix; int const spill = spot + n - m4ri_radix; word temp = M->rows[x][block] << -spill; return temp >> (m4ri_radix - n); } static inline void __mzd_xor_bits(const mzd_t *M, const rci_t x, const rci_t y, const rci_t n, word values) { int const spot = (y + M->offset) % m4ri_radix; wi_t const block = (y + M->offset) / m4ri_radix; M->rows[x][block] ^= values << spot; } static inline void __mzd_clear_bits(const mzd_t *M, const rci_t x, const rci_t y, const rci_t n) { word values = m4ri_ffff >> (m4ri_radix - n); int const spot = (y + M->offset) % m4ri_radix; wi_t const block = (y + M->offset) / m4ri_radix; M->rows[x][block] &= ~(values << spot); } /** * \brief Add n elements to A * * A += B[0] + ... + B[n-1] * * \param A Matrix * \param n Number of elements in list * \param ... Matrices */ static inline mzd_t *mzd_sum(mzd_t *A, const int n, ...) { assert(n>1); va_list b_list; va_start( b_list, n ); mzd_add(A, va_arg(b_list, mzd_t *), va_arg(b_list, mzd_t *)); for( int i = 0 ; i < n-2; i++ ) { mzd_t *B = va_arg(b_list, mzd_t *); mzd_add(A, A, B); } va_end( b_list ); return A; } /** * \brief Add A to n elements * * B[0] += A, ..., B[n-1] += A * * \param A Matrix * \param n Number of elements in list * \param ... Matrices */ static inline mzd_t *mzd_add_to_all(mzd_t *A, const int n, ...) { va_list b_list; va_start( b_list, n ); for( int i = 0 ; i < n; i++ ) { mzd_t *B = va_arg(b_list, mzd_t *); mzd_add(B, B, A); } va_end( b_list ); return A; } #endif //M4RIE_M4RI_FUNCTIONS_H libm4rie-20130416/src/m4rie.h000066400000000000000000000057421212302364300154470ustar00rootroot00000000000000/** * \file m4rie.h * \brief Main include file for the M4RIE library. * * \author Martin Albrecht * * \mainpage * * M4RIE is a library to do fast compations with dense matrices over * \GF2E for small \e. M4RIE is available under the GPLv2+. * * The two fundamental data types of this library are mzed_t and * mzd_slice_t. For big matrices, i.e., those which do not fit into L2 * cache, it is recommended to use mzd_slice_t and for smaller * matrices mzed_t will be slightly faster and use less memory. * * Function names follow the pattern \verbatim [_]_[type]_[what]_[algorithm] \endverbatim * * Function names beginning with an underscore perform less * consistency checks (matching dimensions, matching fields) than * those without, e.g., _mzed_ple() is called by mzed_ple() after some * checks were performed. * * For both data types almost all functions are the same, e.g., there * is a function mzd_slice_add() and there also should be a function * mzed_add() with the same signature except for the matrix type. * * Functions which do not specify an algorithm choose the best * available algorithm (based on some heuristic), e.g., mzed_ple() * might call mzed_ple_newton_john(). * * \defgroup Definitions Type definitions * \defgroup Constructions Constructions * \defgroup Assignment Assignment and basic manipulation * \defgroup RowOperations Operations on rows * \defgroup StringConversions String conversions and I/O * \defgroup Addition Addition and subtraction * \defgroup Multiplication Multiplication * \defgroup PLE PLE and PLUQ decomposition * \defgroup Echelon Echelon forms * \defgroup Triangular Triangular matrices * * \example tests/test_elimination.cc tests/test_multiplication.cc */ #ifndef M4RIE_H #define M4RIE_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #ifdef __cplusplus extern "C" { #endif //__cplusplus #include #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus } #endif //__cplusplus #endif //M4RIE_H libm4rie-20130416/src/mzd_poly.c000066400000000000000000000077641212302364300162650ustar00rootroot00000000000000#include "mzd_poly.h" #include "mzd_slice.h" #include "newton_john.h" void _poly_addmul2(mzd_t **X, const mzd_t **a, const mzd_t **b) { mzd_t *t0 = mzd_init(a[0]->nrows, a[0]->ncols); mzd_t *t1 = mzd_init(b[0]->nrows, b[0]->ncols); mzd_add(t0, a[0], a[1]); mzd_add(t1, b[0], b[1]); mzd_addmul(X[1], t0, t1, 0); /* + (a0+a1)(b0+b1)X */ mzd_free(t0); mzd_free(t1); t0 = mzd_init(a[0]->nrows, b[0]->ncols); mzd_mul(t0, a[0], b[0], 0); /* + a0b0(1-X) */ mzd_add(X[0], X[0], t0); mzd_add(X[1], X[1], t0); mzd_mul(t0, a[1], b[1], 0); /* + a1b1(X+X^2) */ mzd_add(X[1], X[1], t0); mzd_add(X[2], X[2], t0); mzd_free(t0); } void _poly_addmul4(mzd_t **c, const mzd_t **a, const mzd_t **b) { const mzd_t *a0[2] = {a[0],a[1]}; const mzd_t *a1[2] = {a[2],a[3]}; const mzd_t *b0[2] = {b[0],b[1]}; const mzd_t *b1[2] = {b[2],b[3]}; mzd_t *X[3][3] = { {c[0],c[1],c[2]}, {c[2],c[3],c[4]}, {c[4],c[5],c[6]} }; mzd_t *t0[3]; mzd_t *t1[2]; t0[0] = mzd_init(a[0]->nrows, a[0]->ncols); t0[1] = mzd_init(a[0]->nrows, a[0]->ncols); t1[0] = mzd_init(b[0]->nrows, b[0]->ncols); t1[1] = mzd_init(b[0]->nrows, b[0]->ncols); _poly_add(t0, a0, a1, 2); _poly_add(t1, b0, b1, 2); _poly_addmul2(X[1], (const mzd_t**)t0, (const mzd_t**)t1); mzd_free(t0[0]); mzd_free(t0[1]); mzd_free(t1[0]); mzd_free(t1[1]); t0[0] = mzd_init(a[0]->nrows, b[0]->ncols); t0[1] = mzd_init(a[0]->nrows, b[0]->ncols); t0[2] = mzd_init(a[0]->nrows, b[0]->ncols); _poly_addmul2(t0, a0, b0); _poly_add(X[0], (const mzd_t**)X[0], (const mzd_t**)t0, 3); _poly_add(X[1], (const mzd_t**)X[1], (const mzd_t**)t0, 3); mzd_set_ui(t0[0], 0); mzd_set_ui(t0[1], 0); mzd_set_ui(t0[2], 0); _poly_addmul2(t0, a1, b1); _poly_add(X[1], (const mzd_t**)X[1], (const mzd_t**)t0, 3); _poly_add(X[2], (const mzd_t**)X[2], (const mzd_t**)t0, 3); mzd_free(t0[0]); mzd_free(t0[1]); mzd_free(t0[2]); } static inline mzd_slice_t * mzd_slice_addmul_mzd(mzd_slice_t *C, const word a, const mzd_t *A) { for(int i=0; idepth; i++) if(a & 1ULL<x[i], C->x[i], A); return C; } mzd_poly_t *_mzd_poly_addmul1(mzd_poly_t *C, mzd_poly_t *A, mzd_poly_t *B) { const int d = A->depth + B->depth-1; const int log2d = (int)ceil(log2( (double)d )); assert(log2d <= 16); gf2e *ff = gf2e_init(irreducible_polynomials[log2d][1]); mzd_slice_t *a = mzd_slice_init(ff, A->nrows, A->ncols); mzd_slice_t *b = mzd_slice_init(ff, B->nrows, B->ncols); mzd_slice_t **c = (mzd_slice_t**)calloc(sizeof(mzd_slice_t*),d); mzed_t *Phi = mzed_init(ff, d, d); mzed_t *Rho = mzed_init(ff, d, d); /* evaluation at zero */ mzed_write_elem(Phi, 0, 0, 1); c[0] = mzd_slice_init(ff, A->nrows, B->ncols); mzd_mul(c[0]->x[0], A->x[0], B->x[0], 0); /* evaluation at one */ for(int i=0; idepth; i++) mzd_add(a->x[0], a->x[0], A->x[i]); for (int i = 0; i < B->depth; i++) mzd_add(b->x[0], b->x[0], B->x[i]); c[1] = mzd_slice_init(ff, A->nrows, B->ncols); mzd_mul(c[1]->x[0], a->x[0], b->x[0], 0); /* evaluation at 2 ... d-1 */ for(int i=2; idepth) mzd_slice_addmul_mzd(a, acc, A->x[j]); if (j < B->depth) mzd_slice_addmul_mzd(b, acc, B->x[j]); acc = ff->mul(ff, alpha, acc); } c[i] = mzd_slice_mul(NULL, a, b); } mzd_slice_free(a); mzd_slice_free(b); mzed_invert_newton_john(Rho, Phi); mzd_slice_t *tmp = mzd_slice_init(ff, A->nrows, B->ncols); for(int i=0; ix[i], C->x[i], tmp->x[0]); } } mzd_slice_free(tmp); for(int i=0; i * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include /******************************************************************** * Internal representation *******************************************************************/ static inline void _poly_add(mzd_t **c, const mzd_t **a, const mzd_t **b,const unsigned int length) { switch(length) { case 32: mzd_add(c[31], a[31], b[31]); case 31: mzd_add(c[30], a[30], b[30]); case 30: mzd_add(c[29], a[29], b[29]); case 29: mzd_add(c[28], a[28], b[28]); case 28: mzd_add(c[27], a[27], b[27]); case 27: mzd_add(c[26], a[26], b[26]); case 26: mzd_add(c[25], a[25], b[25]); case 25: mzd_add(c[24], a[24], b[24]); case 24: mzd_add(c[23], a[23], b[23]); case 23: mzd_add(c[22], a[22], b[22]); case 22: mzd_add(c[21], a[21], b[21]); case 21: mzd_add(c[20], a[20], b[20]); case 20: mzd_add(c[19], a[19], b[19]); case 19: mzd_add(c[18], a[18], b[18]); case 18: mzd_add(c[17], a[17], b[17]); case 17: mzd_add(c[16], a[16], b[16]); case 16: mzd_add(c[15], a[15], b[15]); case 15: mzd_add(c[14], a[14], b[14]); case 14: mzd_add(c[13], a[13], b[13]); case 13: mzd_add(c[12], a[12], b[12]); case 12: mzd_add(c[11], a[11], b[11]); case 11: mzd_add(c[10], a[10], b[10]); case 10: mzd_add(c[ 9], a[ 9], b[ 9]); case 9: mzd_add(c[ 8], a[ 8], b[ 8]); case 8: mzd_add(c[ 7], a[ 7], b[ 7]); case 7: mzd_add(c[ 6], a[ 6], b[ 6]); case 6: mzd_add(c[ 5], a[ 5], b[ 5]); case 5: mzd_add(c[ 4], a[ 4], b[ 4]); case 4: mzd_add(c[ 3], a[ 3], b[ 3]); case 3: mzd_add(c[ 2], a[ 2], b[ 2]); case 2: mzd_add(c[ 1], a[ 1], b[ 1]); case 1: mzd_add(c[ 0], a[ 0], b[ 0]); case 0: break; default: for(int i=0; ix+offset, (const mzd_t**)A->x, (const mzd_t**)B->x, A->depth); return C; } static inline mzd_poly_t *mzd_poly_add(mzd_poly_t *C, const mzd_poly_t *A, const mzd_poly_t *B) { assert(C->depth >= A->depth && A->depth == B->depth); return _mzd_poly_add(C, A, B, 0); } static inline mzd_poly_t *mzd_poly_init(const deg_t d, const rci_t m, const rci_t n) { mzd_poly_t *A = (mzd_poly_t*)m4ri_mm_malloc(sizeof(mzd_poly_t)); A->x = (mzd_t**)m4ri_mm_malloc(sizeof(mzd_t*)*(d+1)); A->nrows = m; A->ncols = n; A->depth = d+1; for(int i=0; idepth; i++) A->x[i] = mzd_init(m,n); return A; } static inline void mzd_poly_free(mzd_poly_t *A) { for(int i=0; idepth; i++) mzd_free(A->x[i]); #if __M4RI_USE_MM_MALLOC _mm_free(A); #else free(A); #endif } static inline mzd_poly_t *_mzd_poly_adapt_depth(mzd_poly_t *A, const deg_t new_depth) { if (new_depth < A->depth) { for(int i=new_depth; idepth; i++) { mzd_free(A->x[i]); A->x[i] = NULL; } } else { for(int i=A->depth; ix[i] = mzd_init(A->nrows,A->ncols); } } A->depth = new_depth; return A; } static inline mzd_poly_t *_mzd_poly_addmul_naive(mzd_poly_t *C, const mzd_poly_t *A, const mzd_poly_t *B) { if (C == NULL) C = mzd_poly_init(A->depth+B->depth-1, A->nrows, B->ncols); for(unsigned int i=0; idepth; i++) { for(unsigned int j=0; jdepth; j++) { mzd_addmul(C->x[i+j], A->x[i], B->x[j], 0); } } return C; } mzd_poly_t *_mzd_poly_addmul1(mzd_poly_t *C, mzd_poly_t *A, mzd_poly_t *B); /** * \brief Return -1,0,1 if if A < B, A == B or A > B respectively. * * \param A Matrix. * \param B Matrix. * * \note This comparison is not well defined (except for !=0) mathematically and relatively * arbitrary. * * \ingroup Comparison */ static inline int mzd_poly_cmp(mzd_poly_t *A, mzd_poly_t *B) { int r = 0; if ((A->depth != B->depth) ) { if (A->depth < B->depth) return -1; else return 1; } for(int i=0; idepth; i++) r |= mzd_cmp(A->x[i],B->x[i]); return r; } /** * \brief Fill matrix A with random elements. * * \param A Matrix * * \todo Allow the user to provide a RNG callback. * * \ingroup Assignment */ static inline void mzd_poly_randomize(mzd_poly_t *A) { for(int i=0; idepth; i++) mzd_randomize(A->x[i]); } #endif //M4RIE_MZD_POLY_H libm4rie-20130416/src/mzd_slice.c000066400000000000000000000605611212302364300163730ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010,2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "mzd_slice.h" #include "m4ri_functions.h" /** * \brief Add A to coefficient of X^t but perform modular reductions on the fly. * * (A + X^t % minpoly) * * \param ff Finite field * \param A Matrix * \param X Matrix list * \param t Integer >= 0 (degree) */ static inline void mzd_add_modred(const gf2e *ff, const mzd_t *A, mzd_t **X, const int t) { if (mzd_is_zero(A)) return; if (t < ff->degree) { mzd_add(X[t], X[t], A); return; } word pow_gen = ff->pow_gen[t]; for(int i=0; idegree; i++) { if (pow_gen & (1< 0 */ static inline mzd_t *mzd_add_to_all_modred(const gf2e *ff, mzd_t *A, mzd_t **X, const int n, ...) { va_list b_list; va_start( b_list, n ); for( int i = 0 ; i < n; i++ ) { int t = va_arg(b_list, int); mzd_add_modred(ff, A, X, t); } va_end( b_list ); return A; } mzd_slice_t *mzd_slice_mul_scalar(mzd_slice_t *C, const word a, const mzd_slice_t *B) { if(C == NULL) C = mzd_slice_init(B->finite_field, B->nrows, B->ncols); else mzd_slice_set_ui(C, 0); assert( (C->finite_field == B->finite_field) && (((C->nrows ^ B->nrows) | (C->ncols ^ B->ncols)) == 0)); const gf2e *ff = B->finite_field; for(int i=0; idegree; i++) { if(a&(1<depth; j++) mzd_add_modred(ff, B->x[j], C->x, i+j); } } return C; } mzd_slice_t *mzd_slice_addmul_scalar(mzd_slice_t *C, const word a, const mzd_slice_t *B) { assert( (C->finite_field == B->finite_field) && (((C->nrows ^ B->nrows) | (C->ncols ^ B->ncols)) == 0)); const gf2e *ff = B->finite_field; for(int i=0; idegree; i++) { if(a&(1<depth; j++) mzd_add_modred(ff, B->x[j], C->x, i+j); } } return C; } void mzd_slice_set_ui(mzd_slice_t *A, word value) { for(int i=0; idepth; i++) { mzd_set_ui(A->x[i], (value>>i)&1); } } void mzd_slice_print(const mzd_slice_t *A) { char formatstr[10]; int width = gf2e_degree_to_w(A->finite_field)/4; if (gf2e_degree_to_w(A->finite_field)%4) width += 1; sprintf(formatstr,"%%%dx",width); for (rci_t i=0; i < A->nrows; ++i) { printf("["); for (rci_t j=0; j < A->ncols; j++) { word tmp = mzd_slice_read_elem(A,i,j); printf(formatstr,(int)tmp); if(jncols-1) printf(" "); } printf("]\n"); } } mzd_slice_t *_mzd_slice_mul_naive(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { if (C == NULL) C = mzd_slice_init(A->finite_field, A->nrows, B->ncols); const unsigned int e = A->finite_field->degree; mzd_t *t0 = mzd_init(A->nrows, B->ncols); for(unsigned int i=0; ix[i], B->x[j], 0); mzd_add_modred(A->finite_field, t0, C->x, i+j); } } mzd_free(t0); return C; } mzd_slice_t *_mzd_slice_mul_karatsuba2(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { // two temporaries if (C == NULL) C = mzd_slice_init(A->finite_field, A->nrows, B->ncols); mzd_addmul(C->x[0], A->x[1], B->x[1], 0); /* C0 += A1*B1 */ mzd_t *T0 = mzd_addmul(NULL, A->x[0], B->x[0], 0); /* A0B0 = A0*B0 */ mzd_add(C->x[0], C->x[0], T0); /*C0 += A0*B0 */ mzd_add(C->x[1], C->x[1], T0); /*C1 += A0*B0 */ mzd_free(T0); T0 = mzd_add(NULL, A->x[1], A->x[0]); /*T0 = A1 + A0 */ mzd_t *T1 = mzd_add(NULL, B->x[1], B->x[0]); /*T1 = B1 + B0 */ mzd_addmul(C->x[1], T0, T1, 0); /* C1 += A0*B0 + T0*T1 */ mzd_free(T0); mzd_free(T1); return C; } mzd_slice_t *_mzd_slice_mul_karatsuba3(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { /* using three temporary matrices */ if (C == NULL) C = mzd_slice_init(A->finite_field, A->nrows, B->ncols); C = _mzd_slice_adapt_depth(C,4); const mzd_t *a0 = A->x[0]; const mzd_t *a1 = A->x[1]; const mzd_t *a2 = A->x[2]; const mzd_t *b0 = B->x[0]; const mzd_t *b1 = B->x[1]; const mzd_t *b2 = B->x[2]; mzd_t *t0 = mzd_init(a0->nrows, a0->ncols); mzd_t *t1 = mzd_init(b0->nrows, b0->ncols); mzd_t **X = C->x; mzd_add(t0, a0, a1); mzd_add(t1, b0, b1); mzd_addmul(X[1], t0, t1, 0); /* + (a0+a1)(b0+b1)X */ mzd_add(t0, a0, a2); mzd_add(t1, b0, b2); mzd_addmul(X[2], t0, t1, 0); /* + (a0+a2)(b0+b2)X^2 */ mzd_add(t0, a1, a2); mzd_add(t1, b1, b2); mzd_addmul(X[3], t0, t1, 0); /* + (a1+a2)(b1+b2)X^3 */ mzd_free(t0); mzd_free(t1); t0 = mzd_init(a0->nrows, b0->ncols); mzd_mul(t0, a0, b0, 0); /* + a0b0(1-X-X^2) */ mzd_add(X[0], X[0], t0); mzd_add(X[1], X[1], t0); mzd_add(X[2], X[2], t0); mzd_mul(t0, a1, b1, 0); /* + a1b1(X+X^2-X^3) */ mzd_add(X[1], X[1], t0); mzd_add(X[2], X[2], t0); mzd_add(X[3], X[3], t0); mzd_mul(t0, a2, b2, 0); /* + a2b2(-X^2-X^3+X^4) */ /* modular reductions and final additions */ if( (A->finite_field->minpoly & 1<<2) == 0) mzd_add(X[3], X[3], t0); else mzd_add(X[2], X[2], t0); mzd_add(X[1], X[1], t0); if(A->finite_field->minpoly & 1<<2) mzd_add(X[2],X[2],X[3]); else //if (A->finite_field->minpoly & 1<<1) {= mzd_add(X[1],X[1],X[3]); mzd_add(X[0],X[0],X[3]); mzd_free(t0); _mzd_slice_adapt_depth(C,3); return C; } mzd_slice_t *_mzd_slice_mul_karatsuba4(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { /* using three temporary matrices*/ if (C == NULL) C = mzd_slice_init(A->finite_field, A->nrows, B->ncols); const gf2e *ff = A->finite_field; const mzd_t *a0 = A->x[0]; const mzd_t *a1 = A->x[1]; const mzd_t *a2 = A->x[2]; const mzd_t *a3 = A->x[3]; const mzd_t *b0 = B->x[0]; const mzd_t *b1 = B->x[1]; const mzd_t *b2 = B->x[2]; const mzd_t *b3 = B->x[3]; mzd_t **X = C->x; mzd_t *t0 = mzd_init(a0->nrows, b0->ncols); mzd_t *t1 = mzd_init(a0->nrows, a1->ncols); mzd_t *t2 = mzd_init(b0->nrows, b1->ncols); /* (a0 + a1 + a2 + a3)*(b0 + b1 + b2 + b3)*X^3 */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 4, a0, a1, a2, a3), mzd_sum(t2, 4, b0, b1, b2, b3), 0), X, 1, 3); /* (a0 + a1)*(b0 + b1)*(X + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a0, a1), mzd_sum(t2, 2, b0, b1), 0), X, 2, 1, 3); /* (a0 + a2)*(b0 + b2)*(X^2 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a0, a2), mzd_sum(t2, 2, b0, b2), 0), X, 2, 2, 3); /* (a1 + a3)*(b1 + b3)*(X^3 + X^4) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a1, a3), mzd_sum(t2, 2, b1, b3), 0), X, 2, 3, 4); /* (a2 + a3)*(b2 + b3)*(X^3 + X^5) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a2, a3), mzd_sum(t2, 2, b2, b3), 0), X, 2, 3, 5); /* (a0*b0)*(1 + X + X^2 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a0, b0, 0), X, 4, 0, 1, 2, 3); /* (a1*b1)*(X + X^2 + X^3 + X^4) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a1, b1, 0), X, 4, 1, 2, 3, 4); /* (a2*b2)*(X^2 + X^3 + X^4 + X^5) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a2, b2, 0), X, 4, 2, 3, 4, 5); /* (a3*b3)*(X^3 + X^4 + X^5 + X^6) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a3, b3, 0), X, 4, 3, 4, 5, 6); mzd_free(t0); mzd_free(t1); mzd_free(t2); return C; } mzd_slice_t *_mzd_slice_mul_karatsuba5(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { /* using three temporary matrices*/ if (C == NULL) C = mzd_slice_init(A->finite_field, A->nrows, B->ncols); const gf2e *ff = A->finite_field; const mzd_t *a0 = A->x[0]; const mzd_t *a1 = A->x[1]; const mzd_t *a2 = A->x[2]; const mzd_t *a3 = A->x[3]; const mzd_t *a4 = A->x[4]; const mzd_t *b0 = B->x[0]; const mzd_t *b1 = B->x[1]; const mzd_t *b2 = B->x[2]; const mzd_t *b3 = B->x[3]; const mzd_t *b4 = B->x[4]; mzd_t **X = C->x; mzd_t *t0 = mzd_init(a0->nrows, b0->ncols); /* a0b0(X^6 + X^5 + X^4 + X^2 + X + 1)*/ mzd_add_to_all_modred(ff, mzd_mul(t0, a0, b0, 0), X, 6, 6, 5, 4, 2, 1, 0); /* a1b1(X^4 + X)*/ mzd_add_to_all_modred(ff, mzd_mul(t0, a1, b1, 0), X, 2, 4, 1); /* a3b3(X^7 + X^4) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a3, b3, 0), X, 2, 7, 4); /* (a4b4)(X^8 + X^7 + X^6 + X^4 + X^3 + X^2) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a4, b4, 0), X, 6, 8, 7, 6, 4, 3, 2); mzd_t *t1 = mzd_init(a0->nrows, a1->ncols); mzd_t *t2 = mzd_init(b0->nrows, b1->ncols); /* (a0+a4)(b0+b4)(X^6 + X^5 + X^3 + X^2) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a0, a4), mzd_sum(t2, 2, b0, b4), 0), X, 4, 6, 5, 3, 2); /* (a0+a1)(b0+b1)(X^5 + X^4 + X^2 + X) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a0, a1), mzd_sum(t2, 2, b0, b1), 0), X, 4, 5, 4, 2, 1); /* (a3+a4)(b3+b4)(X^7 + X^6 + X^4 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a3, a4), mzd_sum(t2, 2, b3, b4), 0), X, 4, 7, 6, 4, 3); /* (a1+a2+a4)(a1+a2+a4)(X^4 + X^2) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 3, a1, a2, a4), mzd_sum(t2, 3, b1, b2, b4), 0), X, 2, 4, 2); /* (a0+a2+a3)(b0+b2+b3)(X^6 + X^4) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 3, a0, a2, a3), mzd_sum(t2, 3, b0, b2, b3), 0), X, 2, 6, 4); /* (a0+a1+a3+a4)(b0+b1+b3+b4)(X^5 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 4, a0, a1, a3, a4), mzd_sum(t2, 4, b0, b1, b3, b4), 0), X, 2, 5, 3); /* (a0+a1+a2+a4)(b0+b1+b2+b4)(X^5 + X^2) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 4, a0, a1, a2, a4), mzd_sum(t2, 4, b0, b1, b2, b4), 0), X, 2, 5, 2); /* (a0+a2+a3+a4)(b0+b2+b3+b4)(X^6 + X^3)*/ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 4, a0, a2, a3, a4), mzd_sum(t2, 4, b0, b2, b3, b4), 0), X, 2, 6, 3); /* (a0+a1+a2+a3+a4)(b0+b1+b2+b3+b4)(X^5 + X^4 + X^3)*/ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 5, a0, a1, a2, a3, a4), mzd_sum(t2, 5, b0, b1, b2, b3, b4), 0), X, 3, 5, 4, 3); mzd_free(t0); mzd_free(t1); mzd_free(t2); return C; } mzd_slice_t *_mzd_slice_mul_karatsuba6(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { /* using three temporaries */ if (C == NULL) C = mzd_slice_init(A->finite_field, A->nrows, B->ncols); const gf2e *ff = A->finite_field; const mzd_t *a0 = A->x[0]; const mzd_t *a1 = A->x[1]; const mzd_t *a2 = A->x[2]; const mzd_t *a3 = A->x[3]; const mzd_t *a4 = A->x[4]; const mzd_t *a5 = A->x[5]; const mzd_t *b0 = B->x[0]; const mzd_t *b1 = B->x[1]; const mzd_t *b2 = B->x[2]; const mzd_t *b3 = B->x[3]; const mzd_t *b4 = B->x[4]; const mzd_t *b5 = B->x[5]; mzd_t **X = C->x; mzd_t *t0 = mzd_init(a0->nrows, b0->ncols); mzd_t *t1 = mzd_init(a0->nrows, a1->ncols); mzd_t *t2 = mzd_init(b0->nrows, b1->ncols); /* a5b5 (X^10 + X^9 + X^6 + X^5) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a5, b5, 0), X, 4, 10, 9, 6, 5); /* a4b4 (X^9 + X^7 + X^5 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a4, b4, 0), X, 4, 9, 7, 5, 3); /* a1b1 (X^7 + X^6 + X^5 + X^4 + X^3 + X) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a1, b1, 0), X, 6, 7, 6, 5, 4, 3, 1); /* a0b0 (X^6 + X^5 + X + 1) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a0, b0, 0), X, 4, 6, 5, 1, 0); /* (a4 + a5)(b4 + b5) (X^9 + X^8 + X^4+ X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a4, a5), mzd_sum(t2, 2, b4, b5), 0), X, 4, 9, 8, 4, 3); /* (a0 + a1)(b0 + b1) (X^7 + X^4 + X^2 + X) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a0, a1), mzd_sum(t2, 2, b0, b1), 0), X, 4, 7, 4, 2, 1); /* (a3 + a4)(b3 + b4)(X^8 + X^7 + X^6 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a3, a4), mzd_sum(t2, 2, b3, b4), 0), X, 4, 8, 7, 6, 3); /* (a1 + a2)(b1 + b2) (X^7 + X^6 + X^3 + X^2) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a1, a2), mzd_sum(t2, 2, b1, b2), 0), X, 4, 7, 6, 3, 2); /* (a1 + a4)(b1 + b4) (X^4 + X^5) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a1, a4), mzd_sum(t2, 2, b1, b4), 0), X, 2, 4, 5); /* (a2 + a3)(b2 + b3) (X^7 + X^6 + X^4 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a2, a3), mzd_sum(t2, 2, b2, b3), 0), X, 4, 7, 6, 4, 3); /* (a3 + a4 + a5)(b3 + b4 + b5) (X^8 + X^6 + X^4 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 3, a3, a4, a5), mzd_sum(t2, 3, b3, b4, b5), 0), X, 4, 8, 6, 4, 3); /* (a0 + a1 + a2)(b0 + b1 + b2) (X^7 + X^2) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 3, a0, a1, a2), mzd_sum(t2, 3, b0, b1, b2), 0), X, 2, 7, 2); /* (a0 + a3 + a5)(b0 + b3 + b5) (X^7 + X^5) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 3, a0, a3, a5), mzd_sum(t2, 3, b0, b3, b5), 0), X, 2, 7, 5); /* (a0 + a2 + a5)(b0 + b2 + b5) (X^6 + X^5 + X^4 + X^3) */ ; mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 3, a0, a2, a5), mzd_sum(t2, 3, b0, b2, b5), 0), X, 4, 6, 5, 4, 3); /* (a0 + a2 + a3 + a5)(b0 + b2 + b3 + b5) (X^7 + X^5 + X^4 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 4, a0, a2, a3, a5), mzd_sum(t2, 4, b0, b2, b3, b5), 0), X, 4, 7, 5, 4, 3); /* (a0 + a1 + a3 + a4)(b0 + b1 + b3 + b4) (X^6 + X^4) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 4, a0, a1, a3, a4), mzd_sum(t2, 4, b0, b1, b3, b4), 0), X, 2, 6, 4); /* (a0 + a1 + a2 + a3 + a4 + a5)(b0 + b1 + b2 + b3 + b4 + b5) X^6 */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 6, a0, a1, a2, a3, a4, a5), mzd_sum(t2, 6, b0, b1, b2, b3, b4, b5), 0), X, 1, 6); mzd_free(t0); mzd_free(t1); mzd_free(t2); return C; } mzd_slice_t *_mzd_slice_mul_karatsuba7(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { /* using three temporaries */ if (C == NULL) C = mzd_slice_init(A->finite_field, A->nrows, B->ncols); const gf2e *ff = A->finite_field; const mzd_t *a0 = A->x[0]; const mzd_t *a1 = A->x[1]; const mzd_t *a2 = A->x[2]; const mzd_t *a3 = A->x[3]; const mzd_t *a4 = A->x[4]; const mzd_t *a5 = A->x[5]; const mzd_t *a6 = A->x[6]; const mzd_t *b0 = B->x[0]; const mzd_t *b1 = B->x[1]; const mzd_t *b2 = B->x[2]; const mzd_t *b3 = B->x[3]; const mzd_t *b4 = B->x[4]; const mzd_t *b5 = B->x[5]; const mzd_t *b6 = B->x[6]; mzd_t **X = C->x; mzd_t *t0 = mzd_init(a0->nrows, b0->ncols); mzd_t *t1 = mzd_init(a0->nrows, a1->ncols); mzd_t *t2 = mzd_init(b0->nrows, b1->ncols); /* (a0 + a1 + a2 + a3 + a4 + a5 + a6)(b0 + b1 + b2 + b3 + b4 + b5 + b6)(X^7 + X^6 + X^5) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 7, a0, a1, a2, a3, a4, a5, a6), mzd_sum(t2, 7, b0, b1, b2, b3, b4, b5, b6), 0), X, 3, 7, 6, 5); /* (a1 + a2 + a3 + a5 + a6)(b1 + b2 + b3 + b5 + b6)(X^9 + X^6) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 5, a1, a2, a3, a5, a6), mzd_sum(t2, 5, b1, b2, b3, b5, b6), 0), X, 2, 9, 6); /* (a0 + a1 + a3 + a4 + a5)(b0 + b1 + b3 + b4 + b5)(X^6 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 5, a0, a1, a3, a4, a5), mzd_sum(t2, 5, b0, b1, b3, b4, b5), 0), X, 2, 6, 3); /* (a0 + a2 + a3 + a4 + a6)(b0 + b2 + b3 + b4 + b6)(X^9 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 5, a0, a2, a3, a4, a6), mzd_sum(t2, 5, b0, b2, b3, b4, b6), 0), X, 2, 9, 3); /* (a0 + a2 + a3 + a5 + a6)(b0 + b2 + b3 + b5 + b6)(X^7 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 5, a0, a2, a3, a5, a6), mzd_sum(t2, 5, b0, b2, b3, b5, b6), 0), X, 2, 7, 3); /* (a0 + a1 + a3 + a4 + a6)(b0 + b1 + b3 + b4 + b6)(X^9 + X^5) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 5, a0, a1, a3, a4, a6), mzd_sum(t2, 5, b0, b1, b3, b4, b6), 0), X, 2, 9, 5); /* (a1 + a2 + a4 + a5)(b1 + b2 + b4 + b5)(X^9 + X^7 + X^5 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 4, a1, a2, a4, a5), mzd_sum(t2, 4, b1, b2, b4, b5), 0), X, 4, 9, 7, 5, 3); /* (a0 + a1)(b0 + b1)(X^9 + X^7 + X^3 + X) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a0, a1), mzd_sum(t2, 2, b0, b1), 0), X, 4, 9, 7, 3, 1); /* (a0 + a2)(b0 + b2)(X^9 + X^6 + X^5 + X^2) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a0, a2), mzd_sum(t2, 2, b0, b2), 0), X, 4, 9, 6, 5, 2); /* (a0 + a4)(b0 + b4)(X^7 + X^4) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a0, a4), mzd_sum(t2, 2, b0, b4), 0), X, 2, 7, 4); /* (a1 + a3)(b1 + b3)(X^7 + X^6 + X^4 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a1, a3), mzd_sum(t2, 2, b1, b3), 0), X, 4, 7, 6, 4, 3); /* (a2 + a6)(b2 + b6)(X^8 + X^5) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a2, a6), mzd_sum(t2, 2, b2, b6), 0), X, 2, 8, 5); /* (a3 + a5)(b3 + b5)(X^9 + X^8 + X^6 + X^5) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a3, a5), mzd_sum(t2, 2, b3, b5), 0), X, 4, 9, 8, 6, 5); /* (a4 + a6)(b4 + b6)(X^10 + X^7 + X^6 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a4, a6), mzd_sum(t2, 2, b4, b6), 0), X, 4, 10, 7, 6, 3); /* (a5 + a6)(b5 + b6)(X^11 + X^9 + X^5 + X^3) */ mzd_add_to_all_modred(ff, mzd_mul(t0, mzd_sum(t1, 2, a5, a6), mzd_sum(t2, 2, b5, b6), 0), X, 4, 11, 9, 5, 3); /* a0b0(X^6 + X^5 + X^4 + X^2 + X + 1) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a0, b0, 0), X, 6, 6, 5, 4, 2, 1, 0); /* a1b1(X^5 + X^4 + X^2 + X) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a1, b1, 0), X, 4, 5, 4, 2, 1); /* a2b2(X^8 + X^7 + X^6 + X^4 + X^3 + X^2) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a2, b2, 0), X, 6, 8, 7, 6, 4, 3, 2); /* a3b3(X^8 + X^7 + X^5 + X^4) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a3, b3, 0), X, 4, 8, 7, 5, 4); /* a4b4(X^10 + X^9 + X^8 + X^6 + X^5 + X^4)*/ mzd_add_to_all_modred(ff, mzd_mul(t0, a4, b4, 0), X, 6, 10, 9, 8, 6, 5, 4);; /* a5b5(X^11 + X^10 + X^8 + X^7) */ mzd_add_to_all_modred(ff, mzd_mul(t0, a5, b5, 0), X, 4, 11, 10, 8, 7); /* a6b6(X^12 + X^11 + X^10 + X^8 + X^7 + X^6)*/ mzd_add_to_all_modred(ff, mzd_mul(t0, a6, b6, 0), X, 6, 12, 11, 10, 8, 7, 6); mzd_free(t0); mzd_free(t1); mzd_free(t2); return C; } mzd_slice_t *_mzd_slice_mul_karatsuba8(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { /** 8 + 7 temporaries **/ /** * \todo reduce memory requirements by writing formula explicitly **/ if (C == NULL) C = mzd_slice_init(A->finite_field, A->nrows, B->ncols); const word minpoly = A->finite_field->minpoly; C = _mzd_slice_adapt_depth(C,15); const mzd_t *a0[4] = {A->x[0],A->x[1],A->x[2],A->x[3]}; const mzd_t *a1[4] = {A->x[4],A->x[5],A->x[6],A->x[7]}; const mzd_t *b0[4] = {B->x[0],B->x[1],B->x[2],B->x[3]}; const mzd_t *b1[4] = {B->x[4],B->x[5],B->x[6],B->x[7]}; mzd_t *X[3][7] = { {C->x[ 0],C->x[ 1],C->x[ 2],C->x[ 3],C->x[ 4],C->x[ 5],C->x[ 6]}, {C->x[ 4],C->x[ 5],C->x[ 6],C->x[ 7],C->x[ 8],C->x[ 9],C->x[10]}, {C->x[ 8],C->x[ 9],C->x[10],C->x[11],C->x[12],C->x[13],C->x[14]} }; mzd_t *t0[7]; mzd_t *t1[4]; t0[0] = mzd_init(A->nrows, A->ncols); t0[1] = mzd_init(A->nrows, A->ncols); t0[2] = mzd_init(A->nrows, A->ncols); t0[3] = mzd_init(A->nrows, A->ncols); t1[0] = mzd_init(B->nrows, B->ncols); t1[1] = mzd_init(B->nrows, B->ncols); t1[2] = mzd_init(B->nrows, B->ncols); t1[3] = mzd_init(B->nrows, B->ncols); _poly_add(t0, a0, a1, 4); _poly_add(t1, b0, b1, 4); _poly_addmul4(X[1], (const mzd_t**)t0, (const mzd_t**)t1); mzd_free(t0[0]); mzd_free(t0[1]); mzd_free(t0[2]); mzd_free(t0[3]); mzd_free(t1[0]); mzd_free(t1[1]); mzd_free(t1[2]); mzd_free(t1[3]); t0[0] = mzd_init(C->x[0]->nrows, B->x[0]->ncols); t0[1] = mzd_init(C->x[0]->nrows, B->x[0]->ncols); t0[2] = mzd_init(C->x[0]->nrows, B->x[0]->ncols); t0[3] = mzd_init(C->x[0]->nrows, B->x[0]->ncols); t0[4] = mzd_init(C->x[0]->nrows, B->x[0]->ncols); t0[5] = mzd_init(C->x[0]->nrows, B->x[0]->ncols); t0[6] = mzd_init(C->x[0]->nrows, B->x[0]->ncols); _poly_addmul4(t0, a0, b0); _poly_add(X[0], (const mzd_t**)X[0], (const mzd_t**)t0, 7); _poly_add(X[1], (const mzd_t**)X[1], (const mzd_t**)t0, 7); mzd_set_ui(t0[0], 0); mzd_set_ui(t0[1], 0); mzd_set_ui(t0[2], 0); mzd_set_ui(t0[3], 0); mzd_set_ui(t0[4], 0); mzd_set_ui(t0[5], 0); mzd_set_ui(t0[6], 0); _poly_addmul4(t0, a1, b1); _poly_add(X[1], (const mzd_t**)X[1], (const mzd_t**)t0, 7); _poly_add(X[2], (const mzd_t**)X[2], (const mzd_t**)t0, 7); mzd_free(t0[0]); mzd_free(t0[1]); mzd_free(t0[2]); mzd_free(t0[3]); mzd_free(t0[4]); mzd_free(t0[5]); mzd_free(t0[6]); for(unsigned int i=2*8-2; i>= 8; i--) for(unsigned int j=0; j<8; j++) if (minpoly & 1<x[i-8+j], C->x[i-8+j], C->x[i]); _mzd_slice_adapt_depth(C,8); return C; } libm4rie-20130416/src/mzd_slice.h000066400000000000000000000603221212302364300163730ustar00rootroot00000000000000/** * \file mzd_slice.h * * \brief Matrices using a bitsliced representation. * * Matrices over \GF2E can be represented as polynomials with matrix * coefficients where the matrices are in \GF2. * * In this file, matrices over \GF2E are implemented as \e slices of * matrices over \GF2 where each slice holds the coefficients of one * degree when viewing elements of \GF2E as polynomials over \GF2. * * \author Martin Albrecht */ #ifndef M4RIE_MZD_SLICE #define M4RIE_MZD_SLICE /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010,2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include #include /** * \brief Dense matrices over \GF2E represented as slices of matrices over \GF2. * * This is one of two fundamental data types of this library, the * other being mzed_t. For large matrices (\f$m \times n \times e > L2\f$) * it is advisable to use this data type because multiplication * is faster in this representation. Hence, compared to mzed_t one * saves the time to convert betwen representations and - more * importantly - memory. * * \ingroup Definitions */ typedef struct { mzd_t *x[16]; /**< mzd_slice_t::x[e][i,j] is the \e-th bit of the entry A[i,j]. */ rci_t nrows; /**< Number of rows. */ rci_t ncols; /**< Number of columns. */ unsigned int depth; /**< Number of slices * * \note This value may be greater than finite_field->degree in some situations */ const gf2e *finite_field; /**finite_field = ff; A->nrows = m; A->ncols = n; A->depth = ff->degree; for(int i=0; idepth; i++) A->x[i] = mzd_init(m,n); return A; } /** * \brief Return diagonal matrix with value on the diagonal. * * If the matrix is not square then the largest possible square * submatrix is used. * * \param A Matrix. * \param value Finite Field element. * * \ingroup Assignment */ void mzd_slice_set_ui(mzd_slice_t *A, word value); /** * \brief Extend or truncate the depth of A to depth new_depth. * * We may think of mzd_slice_t as polynomials over matrices over * \GF2. This function then truncates/extends these polynomials to * degree new_depth-1. In case of extension, all newly created * coefficients are zero, hence the mathematical content of A is not * changed. In case of truncation higher degree terms are simply * deleted and A's mathematical content modified. * * \param A Matrix, modifed in place. * \param new_depth Integer >= mzd_slice_t::finite_field::degree. */ static inline mzd_slice_t *_mzd_slice_adapt_depth(mzd_slice_t *A, const unsigned int new_depth) { assert(A->finite_field->degree <= new_depth); if (new_depth < A->depth) { for(unsigned int i=new_depth; idepth; i++) { mzd_free(A->x[i]); A->x[i] = NULL; } } else { for(unsigned int i=A->depth; ix[i] = mzd_init(A->nrows,A->ncols); } } A->depth = new_depth; return A; } /** * \brief Free a matrix created with mzd_slice_init(). * * \param A Matrix. * * \ingroup Constructions */ static inline void mzd_slice_free(mzd_slice_t *A) { for(int i=0; idepth; i++) mzd_free(A->x[i]); #if __M4RI_USE_MM_MALLOC _mm_free(A); #else free(A); #endif } /** * \brief Concatenate B to A and write the result to C. * * That is, \verbatim [ A ], [ B ] -> [ A B ] = C \endverbatim * The inputs are not modified but a new matrix is created. * * \param C Matrix, may be NULL for automatic creation. * \param A Matrix. * \param B Matrix. * * \note This is sometimes called augment. * * \ingroup Constructions */ static inline mzd_slice_t *mzd_slice_concat(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { if(C == NULL) C = mzd_slice_init(A->finite_field, A->nrows, A->ncols + B->ncols); for(int i=0; idepth; i++) { mzd_concat(C->x[i], A->x[i], B->x[i]); } return C; } /** * \brief Stack A on top of B and write the result to C. * * That is, \verbatim [ A ], [ B ] -> [ A ] = C [ B ] \endverbatim * The inputs are not modified but a new matrix is created. * * \param C Matrix, may be NULL for automatic creation * \param A Matrix * \param B Matrix * * \ingroup Constructions */ static inline mzd_slice_t *mzd_slice_stack(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { if(C == NULL) C = mzd_slice_init(A->finite_field, A->nrows + B->nrows, A->ncols); for(int i=0; idepth; i++) { mzd_stack(C->x[i], A->x[i], B->x[i]); } return C; } /** * \brief Copy a submatrix. * * \param S Preallocated space for submatrix, may be NULL for automatic creation. * \param A Matrix * \param lowr start rows * \param lowc start column * \param highr stop row (this row is \em not included) * \param highc stop column (this column is \em not included) * * \ingroup Constructions */ static inline mzd_slice_t *mzd_slice_submatrix(mzd_slice_t *S, const mzd_slice_t *A, const size_t lowr, const size_t lowc, const size_t highr, const size_t highc) { if(S==NULL) S = mzd_slice_init(A->finite_field, highr - lowr, highc - lowc); for(int i=0; idepth; i++) { mzd_submatrix(S->x[i], A->x[i], lowr, lowc, highr, highc); } return S; } /** * \brief Create a window/view into the matrix M. * * A matrix window for M is a meta structure on the matrix M. It is * setup to point into the matrix so M \em must \em not be freed while the * matrix window is used. * * This function puts the restriction on the provided parameters that * all parameters must be within range for M which is not currently * enforced. * * Use mzd_slice_free_window() to free the window. * * \param A Matrix * \param lowr Starting row (inclusive) * \param lowc Starting column (inclusive) * \param highr End row (exclusive) * \param highc End column (exclusive) * * \ingroup Constructions */ static inline mzd_slice_t *mzd_slice_init_window(const mzd_slice_t *A, const size_t lowr, const size_t lowc, const size_t highr, const size_t highc) { mzd_slice_t *B = (mzd_slice_t *)m4ri_mm_malloc(sizeof(mzd_slice_t)); B->finite_field = A->finite_field; B->depth = A->depth; B->nrows = highr - lowr; B->ncols = highc - lowc; for(int i=0; idepth; i++) { B->x[i] = mzd_init_window(A->x[i], lowr, lowc, highr, highc); } return B; } /** * \brief Free a matrix window created with mzd_slice_init_window(). * * \param A Matrix * * \ingroup Constructions */ static inline void mzd_slice_free_window(mzd_slice_t *A) { for(int i=0; idepth; i++) { mzd_free_window(A->x[i]); } m4ri_mm_free(A); } /** * \brief \f$ C = A + B\f$. * * \param C Preallocated sum matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix * * \ingroup Addition */ static inline mzd_slice_t *_mzd_slice_add(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { _poly_add(C->x, (const mzd_t**)A->x, (const mzd_t**)B->x, A->depth); return C; } /** * \brief \f$ C = A + B\f$. * * C is also returned. If C is NULL then a new matrix is created which * must be freed by mzd_slice_free(). * * \param C Preallocated sum matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix * * \ingroup Addition */ static inline mzd_slice_t *mzd_slice_add(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { if ( (A->finite_field != B->finite_field) | (A->nrows != B->nrows) | (A->ncols != B->ncols) ) m4ri_die("mzd_slice_add: input matrices A (%d x %d) and B (%d x %d) do not match.\n",A->nrows,A->ncols, B->nrows,B->ncols); if(C == NULL) mzd_slice_init(A->finite_field, A->nrows, A->ncols); else if ( (A->finite_field != C->finite_field) | (A->nrows != C->nrows) | (A->ncols != C->ncols) ) m4ri_die("mzd_slice_add: input matrix A (%d x %d) and output matrix (%d x %d) do not match.\n",A->nrows,A->ncols, C->nrows, C->ncols); return _mzd_slice_add(C,A,B); } /** * \brief \f$ C = A + B\f$. * * C is also returned. If C is NULL then a new matrix is created which * must be freed by mzd_slice_free(). * * \param C Preallocated sum matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix * * \ingroup Addition */ #define mzd_slice_sub mzd_slice_add /** * \brief \f$ C = A + B\f$. * * \param C Preallocated sum matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix * * \ingroup Addition */ #define _mzd_slice_sub _mzd_slice_add /** * \brief \f$ C = A \cdot B \f$ using quadratic polynomial multiplication with matrix coefficients. * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \ingroup Multiplication */ mzd_slice_t *_mzd_slice_mul_naive(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B); /** * \brief \f$ C = A \cdot B \f$ over \GF4 using 3 multiplications over \GF2 and 2 temporary \GF2 matrices.. * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ mzd_slice_t *_mzd_slice_mul_karatsuba2(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B); /** * \brief \f$ C = A \cdot B \f$ over \GF8 using 6 multiplications over \GF2 and 3 temporary \GF2 matrices.. * * The formula was taken from Peter L. Montgomery. "Five, Six, and * Seven-Term Karatsuba-Like Formulae" in IEEE TRANSACTIONS ON * COMPUTERS, VOL. 54, NO. 3, MARCH 2005/ * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ mzd_slice_t *_mzd_slice_mul_karatsuba3(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B); /** * \brief \f$ C = A \cdot B \f$ over \GF16 using 9 multiplications over \GF2 and 3 temporary \GF2 matrices.. * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ mzd_slice_t *_mzd_slice_mul_karatsuba4(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B); /** * \brief \f$ C = A \cdot B \f$ over \GF32 using 13 multiplications over \GF2 and 3 temporary \GF2 matrices.. * * The formula was taken from Peter L. Montgomery. "Five, Six, and * Seven-Term Karatsuba-Like Formulae" in IEEE TRANSACTIONS ON * COMPUTERS, VOL. 54, NO. 3, MARCH 2005/ * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ mzd_slice_t *_mzd_slice_mul_karatsuba5(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B); /** * \brief \f$ C = A \cdot B \f$ over \GF64 using 17 multiplications over \GF2 and 3 temporary \GF2 matrices. * * The formula was taken from Peter L. Montgomery. "Five, Six, and * Seven-Term Karatsuba-Like Formulae" in IEEE TRANSACTIONS ON * COMPUTERS, VOL. 54, NO. 3, MARCH 2005/ * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ mzd_slice_t *_mzd_slice_mul_karatsuba6(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B); /** * \brief \f$ C = A \cdot B \f$ over \GF128 using 22 multiplications over \GF2 and 3 temporary \GF2 matrices. * * The formula was taken from Peter L. Montgomery. "Five, Six, and * Seven-Term Karatsuba-Like Formulae" in IEEE TRANSACTIONS ON * COMPUTERS, VOL. 54, NO. 3, MARCH 2005/ * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ mzd_slice_t *_mzd_slice_mul_karatsuba7(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B); /** * \brief \f$ C = A \cdot B \f$ over \GF256 using 27 multiplications over \GF2 and 15 temporary \GF2 matrices. * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ mzd_slice_t *_mzd_slice_mul_karatsuba8(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B); /** * \brief \f$ C = C + A \cdot B \f$ using Karatsuba multiplication of polynomials over matrices over \GF2. * * This function reduces matrix multiplication over \GF2E to matrix * multiplication over \GF2. * * As an example consider \f$ \mathbb{F}_4 \f$. The minimal polynomial is * \f$ x^2 + x + 1 \f$. The matrix A can be represented as A0*x + A1 and the matrix B * can be represented as B0*x + B1. Their product C is * \f[ A0 \cdot B0 \cdot x^2 + (A0 \cdot B1 + A1 \cdot B0) \cdot x + A1*B1. * \f] * Reduction modulo x^2 + x + 1 gives * \f[ (A0 \cdot B0 + A0 \cdot B1 + A1 \cdot B0) \cdot x + A1 \cdot B1 + A0 \cdot B0. * \f] * This can be re-written as * \f[ ((A0 + A1) \cdot (B0 + B1) + A1 \cdot B1) \cdot x + A1 \cdot B1 + A0 \cdot B0 * \f] * and thus this multiplication costs 3 matrix multiplications over * \GF2 and 4 matrix additions over \GF2. * * This technique was proposed in Tomas J. Boothby and Robert * W. Bradshaw; Bitslicing and the Method of Four Russians Over Larger * Finite Fields; 2009; http://arxiv.org/abs/0901.1413 * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa mzed_mul() mzd_slice_mul() mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ static inline mzd_slice_t *_mzd_slice_mul_karatsuba(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { switch(A->finite_field->degree) { case 2: C = _mzd_slice_mul_karatsuba2(C, A, B); break; case 3: C = _mzd_slice_mul_karatsuba3(C, A, B); break; case 4: C = _mzd_slice_mul_karatsuba4(C, A, B); break; case 5: C = _mzd_slice_mul_karatsuba5(C, A, B); break; case 6: C = _mzd_slice_mul_karatsuba6(C, A, B); break; case 7: C = _mzd_slice_mul_karatsuba7(C, A, B); break; case 8: C = _mzd_slice_mul_karatsuba8(C, A, B); break; default: C = _mzd_slice_mul_naive(C, A, B); break; } return C; } /** * \brief \f$ C = A \cdot B \f$ using Karatsuba multiplication of polynomials over matrices over \GF2. * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ static inline mzd_slice_t *mzd_slice_mul_karatsuba(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { if (A->ncols != B->nrows || A->finite_field != B->finite_field) m4ri_die("mzd_slice_mul_karatsuba: rows, columns and fields must match.\n"); if (C != NULL) { if (C->finite_field != A->finite_field || C->nrows != A->nrows || C->ncols != B->ncols) m4ri_die("mzd_slice_mul_karatsuba: rows and columns of returned matrix must match.\n"); mzd_slice_set_ui(C,0); } return _mzd_slice_mul_karatsuba(C, A, B); } /** * \brief \f$ C = C + A \cdot B\f$ using Karatsuba multiplication of polynomials over matrices over \GF2. * * \param C Preallocated return matrix. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ static inline mzd_slice_t *mzd_slice_addmul_karatsuba(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { assert(C != NULL); if (A->ncols != B->nrows || A->finite_field != B->finite_field) m4ri_die("mzd_slice_addmul_karatsuba: rows, columns and fields must match.\n"); if (C->finite_field != A->finite_field || C->nrows != A->nrows || C->ncols != B->ncols) m4ri_die("mzd_slice_addmul_karatsuba: rows and columns of returned matrix must match.\n"); return _mzd_slice_mul_karatsuba(C, A, B); } /** * \brief \f$ C = a \cdot B \f$. * * \param C Preallocated product matrix or NULL. * \param a finite field element. * \param B Input matrix B. * * \ingroup Multiplication */ mzd_slice_t *mzd_slice_mul_scalar(mzd_slice_t *C, const word a, const mzd_slice_t *B); /** * \brief \f$ C += a \cdot B \f$. * * \param C Preallocated product matrix. * \param a finite field element. * \param B Input matrix B. * * \ingroup Multiplication */ mzd_slice_t *mzd_slice_addmul_scalar(mzd_slice_t *C, const word a, const mzd_slice_t *B); /** * \brief \f$ C = A \cdot B \f$. * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ static inline mzd_slice_t *mzd_slice_mul(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { return mzd_slice_mul_karatsuba(C,A,B); } /** * \brief \f$ C = C + A \cdot B \f$. * * \param C Preallocated return matrix. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzd_slice_mul_karatsuba() * * \ingroup Multiplication */ static inline mzd_slice_t *mzd_slice_addmul(mzd_slice_t *C, const mzd_slice_t *A, const mzd_slice_t *B) { return mzd_slice_addmul_karatsuba(C,A,B); } /** * \brief Fill matrix A with random elements. * * \param A Matrix * * \todo Allow the user to provide a RNG callback. * * \ingroup Assignment */ static inline void mzd_slice_randomize(mzd_slice_t *A) { for(int i=0; idepth; i++) mzd_randomize(A->x[i]); } /** * \brief Copy matrix A to B. * * \param B May be NULL for automatic creation. * \param A Source matrix. * * \ingroup Assignment */ static inline mzd_slice_t *mzd_slice_copy(mzd_slice_t *B, const mzd_slice_t *A) { if(B == NULL) B = mzd_slice_init(A->finite_field, A->nrows, A->ncols); for(int i=0; idepth; i++) { mzd_copy(B->x[i],A->x[i]); } return B; } /** * \brief Get the element at position (row,col) from the matrix A. * * \param A Source matrix. * \param row Starting row. * \param col Starting column. * * \todo This function is considerably slower than it needs to be. * * \ingroup Assignment */ static inline word mzd_slice_read_elem(const mzd_slice_t *A, const rci_t row, const rci_t col) { word ret = 0; for(int i=0; idepth; i++) { ret |= mzd_read_bit(A->x[i], row, col)<depth; i++) { __mzd_xor_bits(A->x[i], row, col, 1, elem&1); elem=elem>>1; } } /** * \brief Write the element elem to the position (row,col) in the matrix A. * * \param A Target matrix. * \param row Starting row. * \param col Starting column. * \param elem finite field element. * * \todo This function is considerably slower than it needs to be. * * \ingroup Assignment */ static inline void mzd_slice_write_elem(mzd_slice_t *A, const rci_t row, const rci_t col, word elem) { for(int i=0; idepth; i++) { mzd_write_bit(A->x[i], row, col, elem&1); elem=elem>>1; } } /** * \brief Return -1,0,1 if if A < B, A == B or A > B respectively. * * \param A Matrix. * \param B Matrix. * * \note This comparison is not well defined (except for !=0) * mathematically and relatively arbitrary since elements of GF(2^k) * don't have an ordering. * * \ingroup Comparison */ static inline int mzd_slice_cmp(mzd_slice_t *A, mzd_slice_t *B) { int r = 0; if ((A->finite_field != B->finite_field) | (A->depth != B->depth) ) return -1; for(int i=0; idepth; i++) r |= mzd_cmp(A->x[i],B->x[i]); return r; } /** * \brief Zero test for matrix. * * \param A Input matrix. * * \ingroup Comparison */ static inline int mzd_slice_is_zero(const mzd_slice_t *A) { for(int i=0; idepth; i++) { if (!mzd_is_zero(A->x[i])) return 0; } return 1; } /** * \brief Swap the two rows rowa and rowb. * * \param A Matrix * \param rowa Row index. * \param rowb Row index. * * \ingroup RowOperations */ static inline void mzd_slice_row_swap(mzd_slice_t *A, const rci_t rowa, const rci_t rowb) { for(int i=0; idepth; i++) { mzd_row_swap(A->x[i], rowa, rowb); } } /** * \brief copy row j from A to row i from B. * * The offsets of A and B must match and the number of columns of A * must be less than or equal to the number of columns of B. * * \param B Target matrix. * \param i Target row index. * \param A Source matrix. * \param j Source row index. * * \ingroup RowOperations */ static inline void mzd_slice_copy_row(mzd_slice_t* B, size_t i, const mzd_slice_t* A, size_t j) { for(int ii=0; iidepth; ii++) mzd_copy_row(B->x[ii], i, A->x[ii], j); } /** * \brief Swap the two columns cola and colb. * * \param A Matrix. * \param cola Column index. * \param colb Column index. * * \ingroup RowOperations */ static inline void mzd_slice_col_swap(mzd_slice_t *A, const rci_t cola, const rci_t colb) { for(int i=0; idepth; i++) mzd_col_swap(A->x[i], cola, colb); } /** * \brief Swap the two columns cola and colb but only between start_row and stop_row. * * \param A Matrix. * \param cola Column index. * \param colb Column index. * \param start_row Row index. * \param stop_row Row index (exclusive). */ static inline void mzd_slice_col_swap_in_rows(mzd_slice_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row) { for(unsigned int e=0; e < A->finite_field->degree; e++) { mzd_col_swap_in_rows(A->x[e], cola, colb, start_row, stop_row); }; } /** * \brief Add the rows sourcerow and destrow and stores the total in * the row destrow. * * \param A Matrix * \param sourcerow Index of source row * \param destrow Index of target row * * \note this can be done much faster with mzd_combine. * * \ingroup RowOperations */ static inline void mzd_slice_row_add(mzd_slice_t *A, const rci_t sourcerow, const rci_t destrow) { for(int i=0; idepth; i++) mzd_row_add(A->x[i], sourcerow, destrow); } /** * \brief Clear the given row, but only begins at the column coloffset. * * \param A Matrix * \param row Index of row * \param coloffset Column offset * * \ingroup RowOperations */ static inline void mzd_slice_row_clear_offset(mzd_slice_t *A, const rci_t row, const rci_t coloffset) { for(int i=0; idepth; i++) mzd_row_clear_offset(A->x[i], row, coloffset); } /** * \brief Print a matrix to stdout. * * \param A Matrix * * \ingroup StringConversions */ void mzd_slice_print(const mzd_slice_t *A); /** * \brief Move the submatrix L of rank r2 starting at column n1 to the left to column r1. * * \param A Matrix * \param r1 Integer < n1 * \param n1 Integer > r1 * \param r2 Integer <= A->ncols - n1 */ static inline void _mzd_slice_compress_l(mzd_slice_t *A, const rci_t r1, const rci_t n1, const rci_t r2) { for(int i=0; idepth; i++) _mzd_compress_l(A->x[i], r1, n1, r2); } #endif //M4RIE_MZD_SLICE libm4rie-20130416/src/mzd_slice_intro.inl000066400000000000000000000016171212302364300201430ustar00rootroot00000000000000#define matrix_t mzd_slice_t #define matrix_set_ui mzd_slice_set_ui #define matrix_write_elem mzd_slice_write_elem #define matrix_init_window mzd_slice_init_window #define matrix_free_window mzd_slice_free_window #define matrix_addmul mzd_slice_addmul #define matrix_apply_p_right mzd_slice_apply_p_right #define matrix_trsm_lower_left mzd_slice_trsm_lower_left #define _matrix_trsm_lower_left _mzd_slice_trsm_lower_left #define matrix_trsm_lower_left_naive mzd_slice_trsm_lower_left_naive #define matrix_trsm_lower_left_newton_john mzd_slice_trsm_lower_left_newton_john #define matrix_trsm_upper_left mzd_slice_trsm_upper_left #define _matrix_trsm_upper_left _mzd_slice_trsm_upper_left #define matrix_trsm_upper_left_naive mzd_slice_trsm_upper_left_naive #define matrix_trsm_upper_left_newton_john mzd_slice_trsm_upper_left_newton_john #define matrix_ple mzd_slice_ple #define matrix_pluq mzd_slice_pluq libm4rie-20130416/src/mzd_slice_outro.inl000066400000000000000000000007431212302364300201570ustar00rootroot00000000000000#undef matrix_t #undef matrix_set_ui #undef matrix_write_elem #undef matrix_init_window #undef matrix_free_window #undef matrix_addmul #undef matrix_apply_p_right #undef matrix_trsm_lower_left #undef _matrix_trsm_lower_left #undef matrix_trsm_lower_left_naive #undef matrix_trsm_lower_left_newton_john #undef matrix_trsm_upper_left #undef _matrix_trsm_upper_left #undef matrix_trsm_upper_left_naive #undef matrix_trsm_upper_left_newton_john #undef matrix_ple #undef matrix_pluq libm4rie-20130416/src/mzed.c000066400000000000000000000653141212302364300153620ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include "config.h" #include "mzed.h" #include "strassen.h" #include "mzd_slice.h" #include "conversion.h" mzed_t *mzed_init(const gf2e* k, rci_t m, rci_t n) { mzed_t *A = (mzed_t *)m4ri_mm_malloc(sizeof(mzed_t)); A->finite_field = k; A->w = gf2e_degree_to_w(A->finite_field); A->nrows = m; A->ncols = n; A->x = mzd_init(m, A->w*n); return A; } void mzed_free(mzed_t *A) { mzd_free(A->x); m4ri_mm_free(A); } void mzed_randomize(mzed_t *A) { unsigned int bitmask = (1<finite_field->degree)-1; for(rci_t r=0; rnrows; r++) { for(rci_t c=0; cncols; c++) { mzed_write_elem(A,r,c, random()&bitmask); } } } mzed_t *mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B) { if (A->nrows != B->nrows || A->ncols != B->ncols || A->finite_field != B->finite_field) { m4ri_die("mzed_add: rows, columns and fields must match.\n"); } if (C == NULL) { C = mzed_init(A->finite_field, A->nrows, A->ncols); } else if (C != A) { if (C->finite_field != A->finite_field || C->nrows != A->nrows || C->ncols != A->ncols) { m4ri_die("mzed_add: rows and columns of returned matrix must match.\n"); } } mzd_add(C->x, A->x, B->x); return C; } mzed_t *_mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B) { mzd_add(C->x, A->x, B->x); return C; } mzed_t *_mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear) { if (A->ncols != B->nrows || A->finite_field != B->finite_field) { m4ri_die("mzed_mul: rows, columns and fields must match.\n"); } if (C == NULL) { C = mzed_init(A->finite_field, A->nrows, B->ncols); } else { if (C->finite_field != A->finite_field || C->nrows != A->nrows || C->ncols != B->ncols) { m4ri_die("mzed_mul: rows and columns of returned matrix must match.\n"); } if (clear) mzed_set_ui(C,0); } return C; } mzed_t *mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B) { C = _mzed_mul_init(C,A,B, TRUE); _mzed_mul(C, A, B); return C; } mzed_t *mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B) { C = _mzed_mul_init(C,A,B, FALSE); _mzed_addmul(C, A, B); return C; } mzed_t *_mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B) { if (A->nrows >= 512 && A->ncols >= 512 && B->ncols >= 512) return _mzed_mul_karatsuba(C, A, B); const rci_t cutoff = _mzed_strassen_cutoff(C, A, B); return _mzed_mul_strassen(C, A, B, cutoff); } mzed_t *_mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B) { assert((A->x->offset | B->x->offset | C->x->offset) == 0); if (A->nrows >= 512 && A->ncols >= 512 && B->ncols >= 512) return _mzed_mul_karatsuba(C, A, B); const rci_t cutoff = _mzed_strassen_cutoff(C, A, B); return _mzed_addmul_strassen(C, A, B, cutoff); } mzed_t *mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B) { C = _mzed_mul_init(C,A,B, TRUE); return _mzed_mul_naive(C, A, B); } mzed_t *mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B) { C = _mzed_mul_init(C,A,B, FALSE); return _mzed_mul_naive(C, A, B); } mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B) { const gf2e* ff = C->finite_field; for (rci_t i=0; inrows; ++i) { for (rci_t j=0; jncols; ++j) { for (rci_t k=0; kncols; ++k) { mzed_add_elem(C, i, j, ff->mul(ff, mzed_read_elem(A,i, k), mzed_read_elem(B, k, j))); } } } return C; } mzed_t *mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B) { /** * The algorithm proceeds as follows: */ if(C == NULL) C = mzed_init(B->finite_field, B->nrows, B->ncols); const gf2e *ff = B->finite_field; /** * 0) If a direct approach would need less lookups we use that. */ if(ff->degree > 8 || B->nrows*B->ncols < 1<<17) { mzed_copy(C, B); for(rci_t i=0; inrows; i++) mzed_rescale_row(C, i, 0, a); return C; } /** * 1) We generate a lookup table of 16-bit wide entries */ const word mask_16 = (1<<16)-1; const word *mul = (const word*)gf2e_t16_init(B->finite_field, a); /** * 2) We use that lookup table to do 4 lookups per word */ for(rci_t i=0; inrows; i++) { word *c_row = C->x->rows[i]; const word *b_row = B->x->rows[i]; for(wi_t j=0; jx->width-1; j++) { const word tmp = b_row[j]; const word a0 = tmp & mask_16; const word a1 = tmp>>16 & mask_16; const word a2 = tmp>>32 & mask_16; const word a3 = tmp>>48 & mask_16; c_row[j] = mul[a3]<<48 | mul[a2]<<32 | mul[a1]<<16 | mul[a0]; } /* deal with rest */ const word tmp = b_row[B->x->width-1] & B->x->high_bitmask; const word a0 = tmp & mask_16; const word a1 = tmp>>16 & mask_16; const word a2 = tmp>>32 & mask_16; const word a3 = tmp>>48 & mask_16; c_row[C->x->width-1] &= ~B->x->high_bitmask; c_row[C->x->width-1] |= mul[a3]<<48 | mul[a2]<<32 | mul[a1]<<16 | mul[a0]; } gf2e_t16_free((word*)mul); return C; } mzed_t *mzed_copy(mzed_t *A, const mzed_t *B) { if (A == B) return A; if (A == NULL) A = mzed_init(B->finite_field, B->nrows, B->ncols); if (A->finite_field != B->finite_field || A->nrows != B->nrows || A->ncols != B->ncols) { m4ri_die("mzed_copy: target matrix has wrong dimensions or base field."); } mzd_copy(A->x, B->x); return A; } rci_t mzed_echelonize_naive(mzed_t *A, int full) { rci_t start_row,r,c,i,elim_start; word x = 0; rci_t nr = A->nrows; rci_t nc = A->ncols; const gf2e *ff = A->finite_field; start_row = 0; for(c=0; cx, r, start_row); if (full) elim_start = 0; else elim_start = start_row + 1; for(i=elim_start; ix, 0); if(!value) return; for(rci_t i=0; i< MIN(A->ncols,A->nrows); i++) { mzed_write_elem(A, i, i, value); } } void mzed_print(const mzed_t *A) { char formatstr[10]; int width = (A->w/4); if (A->w%4) width += 1; sprintf(formatstr,"%%%dx",width); for (rci_t i=0; i < A->nrows; ++i) { printf("["); for (rci_t j=0; j < A->ncols; j++) { word tmp = mzed_read_elem(A,i,j); printf(formatstr,(int)tmp); if(jncols-1) printf(" "); } printf("]\n"); } } void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word x, rci_t start_col) { assert(A->ncols == B->ncols && A->finite_field == B->finite_field); assert(A->x->offset == B->x->offset); assert(start_col < A->ncols); const gf2e *ff = A->finite_field; if (x == 0) { return; } else if(x == 1) { mzed_add_row(A, ar, B, br, start_col); return; } const rci_t start = A->x->offset + A->w*start_col; const wi_t startblock = start/m4ri_radix; const word bitmask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols) % m4ri_radix); mzd_t *from_x = B->x; mzd_t *to_x = A->x; word *_f = from_x->rows[br]; word *_t = to_x->rows[ar]; wi_t j; register word __f = _f[startblock]>>(start%m4ri_radix); register word __t = _t[startblock]; if(A->w == 2) { switch( (start/2) % 32) { case 0: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 0; __f >>= 2; case 1: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 2; __f >>= 2; case 2: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 4; __f >>= 2; case 3: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 6; __f >>= 2; case 4: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 8; __f >>= 2; case 5: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<10; __f >>= 2; case 6: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<12; __f >>= 2; case 7: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<14; __f >>= 2; case 8: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<16; __f >>= 2; case 9: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<18; __f >>= 2; case 10: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<20; __f >>= 2; case 11: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<22; __f >>= 2; case 12: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<24; __f >>= 2; case 13: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<26; __f >>= 2; case 14: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<28; __f >>= 2; case 15: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<30; __f >>= 2; case 16: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<32; __f >>= 2; case 17: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<34; __f >>= 2; case 18: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<36; __f >>= 2; case 19: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<38; __f >>= 2; case 20: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<40; __f >>= 2; case 21: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<42; __f >>= 2; case 22: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<44; __f >>= 2; case 23: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<46; __f >>= 2; case 24: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<48; __f >>= 2; case 25: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<50; __f >>= 2; case 26: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<52; __f >>= 2; case 27: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<54; __f >>= 2; case 28: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<56; __f >>= 2; case 29: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<58; __f >>= 2; case 30: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<60; __f >>= 2; case 31: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<62; break; default: m4ri_die("impossible"); } if(to_x->width-startblock == 1) { _t[startblock] &= ~bitmask_end; _t[startblock] ^= __t & bitmask_end; return; } else { _t[startblock] = __t; } for(j=startblock+1; jwidth -1; j++) { __f = _f[j], __t = _t[j]; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 0; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 2; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 4; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 6; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 8; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<10; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<12; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<14; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<16; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<18; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<20; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<22; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<24; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<26; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<28; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<30; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<32; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<34; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<36; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<38; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<40; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<42; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<44; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<46; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<48; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<50; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<52; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<54; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<56; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<58; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<60; __f >>= 2; __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<62; _t[j] = __t; } switch((to_x->offset + to_x->ncols) % m4ri_radix) { case 0: _t[j] ^= ff->mul(ff, x, (_f[j] & 0xC000000000000000ULL)>>62)<<62; case 62: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x3000000000000000ULL)>>60)<<60; case 60: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0C00000000000000ULL)>>58)<<58; case 58: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0300000000000000ULL)>>56)<<56; case 56: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00C0000000000000ULL)>>54)<<54; case 54: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0030000000000000ULL)>>52)<<52; case 52: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000C000000000000ULL)>>50)<<50; case 50: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0003000000000000ULL)>>48)<<48; case 48: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000C00000000000ULL)>>46)<<46; case 46: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000300000000000ULL)>>44)<<44; case 44: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000C0000000000ULL)>>42)<<42; case 42: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000030000000000ULL)>>40)<<40; case 40: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000C000000000ULL)>>38)<<38; case 38: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000003000000000ULL)>>36)<<36; case 36: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000C00000000ULL)>>34)<<34; case 34: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000300000000ULL)>>32)<<32; case 32: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000C0000000ULL)>>30)<<30; case 30: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000030000000ULL)>>28)<<28; case 28: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000C000000ULL)>>26)<<26; case 26: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000003000000ULL)>>24)<<24; case 24: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000C00000ULL)>>22)<<22; case 22: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000300000ULL)>>20)<<20; case 20: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000000C0000ULL)>>18)<<18; case 18: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000030000ULL)>>16)<<16; case 16: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000C000ULL)>>14)<<14; case 14: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000003000ULL)>>12)<<12; case 12: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000000C00ULL)>>10)<<10; case 10: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000000300ULL)>> 8)<< 8; case 8: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000000000C0ULL)>> 6)<< 6; case 6: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000000030ULL)>> 4)<< 4; case 4: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000000CULL)>> 2)<< 2; case 2: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000000003ULL)>> 0)<< 0; }; } else if(A->w == 4) { switch( (start/4) % 16 ) { case 0: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 0; __f >>= 4; case 1: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 4; __f >>= 4; case 2: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 8; __f >>= 4; case 3: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<12; __f >>= 4; case 4: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<16; __f >>= 4; case 5: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<20; __f >>= 4; case 6: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<24; __f >>= 4; case 7: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<28; __f >>= 4; case 8: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<32; __f >>= 4; case 9: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<36; __f >>= 4; case 10: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<40; __f >>= 4; case 11: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<44; __f >>= 4; case 12: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<48; __f >>= 4; case 13: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<52; __f >>= 4; case 14: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<56; __f >>= 4; case 15: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<60; break; default: m4ri_die("impossible"); } if(to_x->width-startblock == 1) { _t[startblock] &= ~bitmask_end; _t[startblock] ^= __t & bitmask_end; return; } else { _t[startblock] = __t; } for(j=startblock+1; jwidth -1; j++) { __f = _f[j], __t = _t[j]; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 0; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 4; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 8; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<12; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<16; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<20; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<24; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<28; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<32; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<36; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<40; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<44; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<48; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<52; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<56; __f >>= 4; __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<60; _t[j] = __t; } switch((to_x->offset + to_x->ncols) % m4ri_radix) { case 0: _t[j] ^= ff->mul(ff, x, (_f[j] & 0xF000000000000000ULL)>>60)<<60; case 60: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0F00000000000000ULL)>>56)<<56; case 56: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00F0000000000000ULL)>>52)<<52; case 52: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000F000000000000ULL)>>48)<<48; case 48: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000F00000000000ULL)>>44)<<44; case 44: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000F0000000000ULL)>>40)<<40; case 40: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000F000000000ULL)>>36)<<36; case 36: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000F00000000ULL)>>32)<<32; case 32: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000F0000000ULL)>>28)<<28; case 28: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000F000000ULL)>>24)<<24; case 24: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000F00000ULL)>>20)<<20; case 20: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000000F0000ULL)>>16)<<16; case 16: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000F000ULL)>>12)<<12; case 12: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000000F00ULL)>> 8)<< 8; case 8: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000000000F0ULL)>> 4)<< 4; case 4: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000000FULL)>> 0)<< 0; }; } else if (A->w == 8) { register word __t0 ,__t1, __f0, __f1; __f0 = _f[startblock]>>(start%m4ri_radix), __t0 = _t[startblock]; switch( (start/8) % 8 ) { case 0: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 0; __f0 >>= 8; case 1: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 8; __f0 >>= 8; case 2: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<16; __f0 >>= 8; case 3: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<24; __f0 >>= 8; case 4: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<32; __f0 >>= 8; case 5: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<40; __f0 >>= 8; case 6: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<48; __f0 >>= 8; case 7: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<56; break; default: m4ri_die("impossible"); } if(to_x->width-startblock == 1) { _t[startblock] &= ~bitmask_end; _t[startblock] ^= __t0 & bitmask_end; return; } else { _t[startblock] = __t0; } for(j=startblock+1; j+2 < to_x->width; j+=2) { __f0 = _f[j], __t0 = _t[j]; __f1 = _f[j+1], __t1 = _t[j+1]; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 0; __f0 >>= 8; __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<< 0; __f1 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 8; __f0 >>= 8; __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<< 8; __f1 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<16; __f0 >>= 8; __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<16; __f1 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<24; __f0 >>= 8; __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<24; __f1 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<32; __f0 >>= 8; __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<32; __f1 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<40; __f0 >>= 8; __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<40; __f1 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<48; __f0 >>= 8; __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<48; __f1 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<56; __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<56; _t[j+0] = __t0; _t[j+1] = __t1; } for(; j < to_x->width-1; j++) { __f0 = _f[j], __t0 = _t[j]; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 0; __f0 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 8; __f0 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<16; __f0 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<24; __f0 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<32; __f0 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<40; __f0 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<48; __f0 >>= 8; __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<56; _t[j] = __t0; } switch((to_x->offset + to_x->ncols) % m4ri_radix) { case 0: _t[j] ^= ff->mul(ff, x, (_f[j] & 0xFF00000000000000ULL)>>56)<<56; case 56: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00FF000000000000ULL)>>48)<<48; case 48: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000FF0000000000ULL)>>40)<<40; case 40: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000FF00000000ULL)>>32)<<32; case 32: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000FF000000ULL)>>24)<<24; case 24: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000FF0000ULL)>>16)<<16; case 16: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000FF00ULL)>> 8)<< 8; case 8: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000000000FFULL)>> 0)<< 0; }; } else if (A->w == 16) { mzd_t *from_x = B->x; mzd_t *to_x = A->x; word *_f = from_x->rows[br]; word *_t = to_x->rows[ar]; size_t j; register word __t, __f; __f = _f[startblock]>>(start%m4ri_radix), __t = _t[startblock]; switch( (start/16)%4 ) { case 0: __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16; case 1: __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16; case 2: __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16; case 3: __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48; break; default: m4ri_die("impossible"); } if(to_x->width-startblock == 1) { _t[startblock] &= ~bitmask_end; _t[startblock] ^= __t & bitmask_end; return; } else { _t[startblock] = __t; } for(j=startblock+1; j+4width; j+=4) { __f = _f[j], __t = _t[j]; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48; _t[j] = __t; __f = _f[j+1], __t = _t[j+1]; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48; _t[j+1] = __t; __f = _f[j+2], __t = _t[j+2]; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48; _t[j+2] = __t; __f = _f[j+3], __t = _t[j+3]; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48; _t[j+3] = __t; } for( ; jwidth-1; j++) { __f = _f[j], __t = _t[j]; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16; __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48; _t[j] = __t; } switch((to_x->offset + to_x->ncols) % m4ri_radix) { case 0: _t[j] ^= ff->mul(ff, x, (_f[j] & 0xFFFF000000000000ULL)>>48)<<48; case 48: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000FFFF00000000ULL)>>32)<<32; case 32: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000FFFF0000ULL)>>16)<<16; case 16: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000FFFFULL)>> 0)<< 0; }; } else { for(rci_t j=start_col; jncols; j++) { mzed_add_elem(A, ar, j, ff->mul(ff, x, mzed_read_elem(B, br, j))); } } } libm4rie-20130416/src/mzed.h000066400000000000000000001072041212302364300153620ustar00rootroot00000000000000/** * \file mzed.h * * \brief Dense matrices over \GF2E represented as packed matrices. * * This file implements the data type mzed_t. That is, matrices over * \GF2E in row major representation. * For example, let \f$ a = \sum a_i x_i / \f$ and \f$b = \sum b_i x_i / \f$ * be elements in \f$\mathbb{F}_{2^6}\f$ with minimal polynomial \f$f\f$. Then, the * \f$ 1 \times 2\f$ matrix [b a] would be stored as \verbatim [...| 0 0 b5 b4 b3 b2 b1 b0 | 0 0 a5 a4 a3 a2 a1 a0] \endverbatim * * Internally M4RI matrices are used to store bits with allows to * re-use existing M4RI methods (such as mzd_add) when implementing * functions for mzed_t. * * This data type is preferable when Newton-John tables ought be used * or when the matrix is small (\f$ m \times n \times e < L2\f$). * * \author Martin Albrecht */ #ifndef M4RIE_MZED_H #define M4RIE_MZED_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010,2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include #include /** * \brief Dense matrices over \GF2E represented as packed matrices. * * \ingroup Definitions */ typedef struct { mzd_t *x; /**< \f$m \times n\f$ matrices over \GF2E are represented as \f$m \times (en)\f$ matrices over \GF2. */ const gf2e *finite_field; /**< A finite field \GF2E. */ rci_t nrows; /**< Number of rows. */ rci_t ncols; /**< Number of columns. */ wi_t w; /**< The internal width of elements (must divide 64). */ } mzed_t; /** * \brief Create a new matrix of dimension m x n over ff * * Use mzed_free() to kill it. * * \param ff Finite field * \param m Number of rows * \param n Number of columns * * \ingroup Constructions */ mzed_t *mzed_init(const gf2e *ff, const rci_t m, const rci_t n); /** * \brief Free a matrix created with mzed_init(). * * \param A Matrix * * \ingroup Constructions */ void mzed_free(mzed_t *A); /** * \brief Concatenate B to A and write the result to C. * * That is, \verbatim [ A ], [ B ] -> [ A B ] = C \endverbatim * The inputs are not modified but a new matrix is created. * * \param C Matrix, may be NULL for automatic creation * \param A Matrix * \param B Matrix * * \note This is sometimes called augment. * * \ingroup Constructions */ static inline mzed_t *mzed_concat(mzed_t *C, const mzed_t *A, const mzed_t *B) { if(C==NULL) C = mzed_init(A->finite_field, A->nrows, A->ncols + B->ncols); mzd_concat(C->x, A->x, B->x); return C; } /** * \brief Stack A on top of B and write the result to C. * * That is, \verbatim [ A ], [ B ] -> [ A ] = C [ B ] \endverbatim * The inputs are not modified but a new matrix is created. * * \param C Matrix, may be NULL for automatic creation * \param A Matrix * \param B Matrix * * \ingroup Constructions */ static inline mzed_t *mzed_stack(mzed_t *C, const mzed_t *A, const mzed_t *B) { if(C==NULL) C = mzed_init(A->finite_field, A->nrows + B->nrows, A->ncols); mzd_stack(C->x, A->x, B->x); return C; } /** * \brief Copy a submatrix. * * Note that the upper bounds are not included. * * \param S Preallocated space for submatrix, may be NULL for automatic creation. * \param M Matrix * \param lowr start rows * \param lowc start column * \param highr stop row (this row is \em not included) * \param highc stop column (this column is \em not included) * * \ingroup Constructions */ static inline mzed_t *mzed_submatrix(mzed_t *S, const mzed_t *M, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) { if(S==NULL) S = mzed_init(M->finite_field, highr - lowr, highc - lowc); mzd_submatrix(S->x, M->x, lowr, lowc*M->w, highr, highc*M->w); return S; } /** * \brief Create a window/view into the matrix A. * * A matrix window for A is a meta structure on the matrix A. It is * setup to point into the matrix so M \em must \em not be freed while * the matrix window is used. * * This function puts the restriction on the provided parameters that * all parameters must be within range for A which is not currently * enforced. * * Use mzed_free_window() to free the window. * * \param A Matrix * \param lowr Starting row (inclusive) * \param lowc Starting column (inclusive) * \param highr End row (exclusive) * \param highc End column (exclusive) * * \ingroup Constructions */ static inline mzed_t *mzed_init_window(const mzed_t *A, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) { mzed_t *B = (mzed_t *)m4ri_mm_malloc(sizeof(mzed_t)); B->finite_field = A->finite_field; B->w = gf2e_degree_to_w(A->finite_field); B->nrows = highr - lowr; B->ncols = highc - lowc; B->x = mzd_init_window(A->x, lowr, B->w*lowc, highr, B->w*highc); return B; } /** * \brief Free a matrix window created with mzed_init_window(). * * \param A Matrix * * \ingroup Constructions */ static inline void mzed_free_window(mzed_t *A) { mzd_free_window(A->x); m4ri_mm_free(A); } /** * \brief \f$ C = A+B \f$. * * C is also returned. If C is NULL then a new matrix is created which * must be freed by mzed_free(). * * \param C Preallocated sum matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix * * \ingroup Addition */ mzed_t *mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$ C = A+B \f$. * * \param C Preallocated sum matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix * * \ingroup Addition */ mzed_t *_mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$ C = A+B \f$. * * \param C Preallocated difference matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix * * \ingroup Addition */ #define mzed_sub mzed_add /** * \brief \f$ C = A+B \f$. * * \param C Preallocated difference matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix * * \ingroup Addition */ #define _mzed_sub _mzed_add /** * \brief \f$ C = A \cdot B \f$. * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \ingroup Multiplication */ mzed_t *mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$ C = C + A \cdot B \f$. * * \param C Preallocated product matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \ingroup Multiplication */ mzed_t *mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$ C = A \cdot B \f$. * * \param C Preallocated product matrix. * \param A Input matrix A. * \param B Input matrix B. * * \ingroup Multiplication */ mzed_t *_mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$ C = C + A \cdot B \f$. * * \param C Preallocated product matrix. * \param A Input matrix A. * \param B Input matrix B. * * \ingroup Multiplication */ mzed_t *_mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$ C = C + A \cdot B \f$ using naive cubic multiplication. * * \param C Preallocated product matrix. * \param A Input matrix A. * \param B Input matrix B. * * \note There is no reason to call this function except for checking * the correctness of other algorithms. It is very slow. * * \ingroup Multiplication */ mzed_t *mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$ C = A \cdot B \f$ using naive cubic multiplication. * * \param C Preallocated product matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \note There is no reason to call this function except for checking * the correctness of other algorithms. It is very slow. * * \ingroup Multiplication */ mzed_t *mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$ C = C + A \cdot B \f$ using naive cubic multiplication. * * \param C Preallocated product matrix. * \param A Input matrix A. * \param B Input matrix B. * * \ingroup Multiplication */ mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$ C = a \cdot B \f$. * * \param C Preallocated product matrix or NULL. * \param a finite field element. * \param B Input matrix B. * * \ingroup Multiplication */ mzed_t *mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B); /** * Check whether C, A and B match in sizes and fields for * multiplication * * \param C Output matrix, if NULL a new matrix is created. * \param A Input matrix. * \param B Input matrix. * \param clear Write zeros to C or not. */ mzed_t *_mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear); /** * \brief Fill matrix A with random elements. * * \param A Matrix * * \todo Allow the user to provide a RNG callback. * * \ingroup Assignment */ void mzed_randomize(mzed_t *A); /** * \brief Copy matrix A to B. * * \param B May be NULL for automatic creation. * \param A Source matrix. * * \ingroup Assignment */ mzed_t *mzed_copy(mzed_t *B, const mzed_t *A); /** * \brief Return diagonal matrix with value on the diagonal. * * If the matrix is not square then the largest possible square * submatrix is used. * * \param A Matrix * \param value Finite Field element * * \ingroup Assignment */ void mzed_set_ui(mzed_t *A, word value); /** * \brief Get the element at position (row,col) from the matrix A. * * \param A Source matrix. * \param row Starting row. * \param col Starting column. * * \ingroup Assignment */ static inline word mzed_read_elem(const mzed_t *A, const rci_t row, const rci_t col) { return __mzd_read_bits(A->x, row, A->w*col, A->w); } /** * \brief At the element elem to the element at position (row,col) in the matrix A. * * \param A Target matrix. * \param row Starting row. * \param col Starting column. * \param elem finite field element. * * \ingroup Assignment */ static inline void mzed_add_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) { __mzd_xor_bits(A->x, row, A->w*col, A->w, elem); } /** * \brief Write the element elem to the position (row,col) in the matrix A. * * \param A Target matrix. * \param row Starting row. * \param col Starting column. * \param elem finite field element. * * \ingroup Assignment */ static inline void mzed_write_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) { __mzd_clear_bits(A->x, row, A->w*col, A->w); __mzd_xor_bits(A->x, row, A->w*col, A->w, elem); } /** * \brief Return -1,0,1 if if A < B, A == B or A > B respectively. * * \param A Matrix. * \param B Matrix. * * \note This comparison is not well defined mathematically and * relatively arbitrary since elements of \GF2E don't have an * ordering. * * \ingroup Comparison */ static inline int mzed_cmp(mzed_t *A, mzed_t *B) { return mzd_cmp(A->x,B->x); } /** * \brief Zero test for matrix. * * \param A Input matrix. * * \ingroup Comparison */ static inline int mzed_is_zero(const mzed_t *A) { return mzd_is_zero(A->x); } /** * A[ar,c] = A[ar,c] + x*B[br,c] for all c >= startcol. * * \param A Matrix. * \param ar Row index in A. * \param B Matrix. * \param br Row index in B. * \param x Finite field element. * \param start_col Column index. * * \ingroup RowOperations */ void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word x, rci_t start_col); /** * A[ar,c] = A[ar,c] + B[br,c] for all c >= startcol. * * \param A Matrix. * \param ar Row index in A. * \param B Matrix. * \param br Row index in B. * \param start_col Column index. * * \ingroup RowOperations */ static inline void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, rci_t start_col) { assert(A->ncols == B->ncols && A->finite_field == B->finite_field); assert(A->x->offset == B->x->offset); assert(start_col < A->ncols); const rci_t start = A->x->offset + A->w*start_col; const wi_t startblock = start/m4ri_radix; const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix)); const word bitmask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols) % m4ri_radix); word *_a = A->x->rows[ar]; const word *_b = B->x->rows[br]; wi_t j; if (A->x->width - startblock > 1) { _a[startblock] ^= _b[startblock] & bitmask_begin; for(j=startblock+1; jx->width-1; j++) _a[j] ^= _b[j]; _a[j] ^= _b[j] & bitmask_end; } else { _a[startblock] ^= _b[startblock] & (bitmask_begin & bitmask_end); } } /** * \brief Rescale the row r in A by X starting c. * * \param A Matrix * \param r Row index. * \param start_col Column index. * \param x Multiplier * * \ingroup RowOperations */ static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word x) { assert(start_col < A->ncols); const gf2e *ff = A->finite_field; const rci_t start = A->x->offset + A->w*start_col; const wi_t startblock = start/m4ri_radix; word *_a = A->x->rows[r]; const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix)); const word bitmask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols) % m4ri_radix); register word __a = _a[startblock]>>(start%m4ri_radix); register word __t = 0; int j; if(A->w == 2) { switch( (start/2) % 32 ) { case 0: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0; __a >>= 2; case 1: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2; __a >>= 2; case 2: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4; __a >>= 2; case 3: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6; __a >>= 2; case 4: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8; __a >>= 2; case 5: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10; __a >>= 2; case 6: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12; __a >>= 2; case 7: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14; __a >>= 2; case 8: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16; __a >>= 2; case 9: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18; __a >>= 2; case 10: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20; __a >>= 2; case 11: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22; __a >>= 2; case 12: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24; __a >>= 2; case 13: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26; __a >>= 2; case 14: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28; __a >>= 2; case 15: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30; __a >>= 2; case 16: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32; __a >>= 2; case 17: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34; __a >>= 2; case 18: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36; __a >>= 2; case 19: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38; __a >>= 2; case 20: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40; __a >>= 2; case 21: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42; __a >>= 2; case 22: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44; __a >>= 2; case 23: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46; __a >>= 2; case 24: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48; __a >>= 2; case 25: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50; __a >>= 2; case 26: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52; __a >>= 2; case 27: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54; __a >>= 2; case 28: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56; __a >>= 2; case 29: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58; __a >>= 2; case 30: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60; __a >>= 2; case 31: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62; break; default: m4ri_die("impossible"); } if(A->x->width-startblock == 1) { _a[startblock] &= ~(bitmask_begin & bitmask_end); _a[startblock] ^= __t & bitmask_begin & bitmask_end; return; } else { _a[startblock] &= ~bitmask_begin; _a[startblock] ^= __t & bitmask_begin; } for(j=startblock+1; jx->width -1; j++) { __a = _a[j], __t = 0; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60; __a >>= 2; __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62; _a[j] = __t; } __t = _a[j] & ~bitmask_end; switch((A->x->offset+A->x->ncols) % m4ri_radix) { case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xC000000000000000ULL)>>62)<<62; case 62: __t ^= ff->mul(ff, x, (_a[j] & 0x3000000000000000ULL)>>60)<<60; case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0C00000000000000ULL)>>58)<<58; case 58: __t ^= ff->mul(ff, x, (_a[j] & 0x0300000000000000ULL)>>56)<<56; case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00C0000000000000ULL)>>54)<<54; case 54: __t ^= ff->mul(ff, x, (_a[j] & 0x0030000000000000ULL)>>52)<<52; case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000C000000000000ULL)>>50)<<50; case 50: __t ^= ff->mul(ff, x, (_a[j] & 0x0003000000000000ULL)>>48)<<48; case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000C00000000000ULL)>>46)<<46; case 46: __t ^= ff->mul(ff, x, (_a[j] & 0x0000300000000000ULL)>>44)<<44; case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000C0000000000ULL)>>42)<<42; case 42: __t ^= ff->mul(ff, x, (_a[j] & 0x0000030000000000ULL)>>40)<<40; case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000C000000000ULL)>>38)<<38; case 38: __t ^= ff->mul(ff, x, (_a[j] & 0x0000003000000000ULL)>>36)<<36; case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000C00000000ULL)>>34)<<34; case 34: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000300000000ULL)>>32)<<32; case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000C0000000ULL)>>30)<<30; case 30: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000030000000ULL)>>28)<<28; case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000C000000ULL)>>26)<<26; case 26: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000003000000ULL)>>24)<<24; case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000C00000ULL)>>22)<<22; case 22: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000300000ULL)>>20)<<20; case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000C0000ULL)>>18)<<18; case 18: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000030000ULL)>>16)<<16; case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000C000ULL)>>14)<<14; case 14: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000003000ULL)>>12)<<12; case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000C00ULL)>>10)<<10; case 10: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000300ULL)>> 8)<< 8; case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000C0ULL)>> 6)<< 6; case 6: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000030ULL)>> 4)<< 4; case 4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000CULL)>> 2)<< 2; case 2: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000003ULL)>> 0)<< 0; }; _a[j] = __t; } else if(A->w == 4) { switch( (start/4)%16 ) { case 0: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0; __a >>= 4; case 1: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4; __a >>= 4; case 2: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8; __a >>= 4; case 3: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12; __a >>= 4; case 4: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16; __a >>= 4; case 5: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20; __a >>= 4; case 6: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24; __a >>= 4; case 7: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28; __a >>= 4; case 8: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32; __a >>= 4; case 9: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36; __a >>= 4; case 10: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40; __a >>= 4; case 11: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44; __a >>= 4; case 12: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48; __a >>= 4; case 13: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52; __a >>= 4; case 14: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56; __a >>= 4; case 15: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60; break; default: m4ri_die("impossible"); } if(A->x->width-startblock == 1) { _a[startblock] &= ~(bitmask_begin & bitmask_end); _a[startblock] ^= __t & bitmask_begin & bitmask_end; return; } else { _a[startblock] &= ~bitmask_begin; _a[startblock] ^= __t & bitmask_begin; } for(j=startblock+1; jx->width -1; j++) { __a = _a[j], __t = 0; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56; __a >>= 4; __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60; _a[j] = __t; } __t = _a[j] & ~bitmask_end; switch( (A->x->offset + A->x->ncols) % m4ri_radix) { case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xF000000000000000ULL)>>60)<<60; case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0F00000000000000ULL)>>56)<<56; case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00F0000000000000ULL)>>52)<<52; case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000F000000000000ULL)>>48)<<48; case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000F00000000000ULL)>>44)<<44; case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000F0000000000ULL)>>40)<<40; case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000F000000000ULL)>>36)<<36; case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000F00000000ULL)>>32)<<32; case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000F0000000ULL)>>28)<<28; case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000F000000ULL)>>24)<<24; case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000F00000ULL)>>20)<<20; case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000F0000ULL)>>16)<<16; case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000F000ULL)>>12)<<12; case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000F00ULL)>> 8)<< 8; case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000F0ULL)>> 4)<< 4; case 4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000FULL)>> 0)<< 0; }; _a[j] = __t; } else if (A->w == 8) { register word __a0 = _a[startblock]>>(start%m4ri_radix); register word __a1; register word __t0 = 0; register word __t1; switch( (start/8) %8 ) { case 0: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 0; __a0 >>= 8; case 1: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 8; __a0 >>= 8; case 2: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<16; __a0 >>= 8; case 3: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<24; __a0 >>= 8; case 4: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<32; __a0 >>= 8; case 5: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<40; __a0 >>= 8; case 6: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<48; __a0 >>= 8; case 7: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<56; break; default: m4ri_die("impossible"); } if(A->x->width-startblock == 1) { _a[startblock] &= ~(bitmask_begin & bitmask_end); _a[startblock] ^= __t0 & bitmask_begin & bitmask_end; return; } else { _a[startblock] &= ~bitmask_begin; _a[startblock] ^= __t0 & bitmask_begin; } for(j=startblock+1; j+2 < A->x->width; j+=2) { __a0 = _a[j], __t0 = 0; __a1 = _a[j+1], __t1 = 0; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8; __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 0; __a1 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8; __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 8; __a1 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8; __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<16; __a1 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8; __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<24; __a1 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8; __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<32; __a1 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8; __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<40; __a1 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8; __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<48; __a1 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56; __a0 >>= 8; __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<56; _a[j+0] = __t0; _a[j+1] = __t1; } for(; j < A->x->width-1; j++) { __a0 = _a[j], __t0 = 0; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8; __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56; _a[j] = __t0; } __t = _a[j] & ~bitmask_end; switch( (A->x->offset + A->x->ncols) % m4ri_radix ) { case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xFF00000000000000ULL)>>56)<<56; case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00FF000000000000ULL)>>48)<<48; case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FF0000000000ULL)>>40)<<40; case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000FF00000000ULL)>>32)<<32; case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FF000000ULL)>>24)<<24; case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000FF0000ULL)>>16)<<16; case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FF00ULL)>> 8)<< 8; case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000FFULL)>> 0)<< 0; }; _a[j] = __t; } else if (A->w == 16) { switch( (start/16) %4 ) { case 0: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16; case 1: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16; case 2: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16; case 3: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48; break; default: m4ri_die("impossible"); } if(A->x->width-startblock == 1) { _a[startblock] &= ~(bitmask_begin & bitmask_end); _a[startblock] ^= __t & bitmask_begin & bitmask_end; return; } else { _a[startblock] &= ~bitmask_begin; _a[startblock] ^= __t & bitmask_begin; } for(j=startblock+1; j+4x->width; j+=4) { __a = _a[j], __t = 0; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48; _a[j] = __t; __a = _a[j+1], __t = 0; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48; _a[j+1] = __t; __a = _a[j+2], __t = 0; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48; _a[j+2] = __t; __a = _a[j+3], __t = 0; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48; _a[j+3] = __t; } for( ; jx->width-1; j++) { __a = _a[j], __t = 0; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16; __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48; _a[j] = __t; } __t = _a[j] & ~bitmask_end; switch( (A->x->offset + A->x->ncols) % m4ri_radix) { case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xFFFF000000000000ULL)>>48)<<48; case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FFFF00000000ULL)>>32)<<32; case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FFFF0000ULL)>>16)<<16; case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FFFFULL)>> 0)<< 0; }; _a[j] = __t; } else { for(rci_t j=start_col; jncols; j++) { mzed_write_elem(A, r, j, ff->mul(ff, x, mzed_read_elem(A, r, j))); } } } /** * \brief Swap the two rows rowa and rowb. * * \param M Matrix * \param rowa Row index. * \param rowb Row index. * * \ingroup RowOperations */ static inline void mzed_row_swap(mzed_t *M, const rci_t rowa, const rci_t rowb) { mzd_row_swap(M->x, rowa, rowb); } /** * \brief copy row j from A to row i from B. * * The offsets of A and B must match and the number of columns of A * must be less than or equal to the number of columns of B. * * \param B Target matrix. * \param i Target row index. * \param A Source matrix. * \param j Source row index. * * \ingroup RowOperations */ static inline void mzed_copy_row(mzed_t* B, rci_t i, const mzed_t* A, rci_t j) { mzd_copy_row(B->x, i, A->x, j); } /** * \brief Swap the two columns cola and colb. * * \param M Matrix. * \param cola Column index. * \param colb Column index. * * \ingroup RowOperations */ static inline void mzed_col_swap(mzed_t *M, const rci_t cola, const rci_t colb) { for(rci_t i=0; iw; i++) mzd_col_swap(M->x,M->w*cola+i, M->w*colb+i); } /** * \brief Swap the two columns cola and colb but only between start_row and stop_row. * * \param A Matrix. * \param cola Column index. * \param colb Column index. * \param start_row Row index. * \param stop_row Row index (exclusive). * * \ingroup RowOperations */ static inline void mzed_col_swap_in_rows(mzed_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row) { for(unsigned int e=0; e < A->finite_field->degree; e++) { mzd_col_swap_in_rows(A->x, A->w*cola+e, A->w*colb+e, start_row, stop_row); }; } /** * \brief Add the rows sourcerow and destrow and stores the total in * the row destrow. * * \param M Matrix * \param sourcerow Index of source row * \param destrow Index of target row * * \note this can be done much faster with mzed_combine. * * \ingroup RowOperations */ static inline void mzed_row_add(mzed_t *M, const rci_t sourcerow, const rci_t destrow) { mzd_row_add(M->x, sourcerow, destrow); } /** * \brief Return the first row with all zero entries. * * If no such row can be found returns nrows. * * \param A Matrix * * \ingroup RowOperations */ static inline rci_t mzed_first_zero_row(mzed_t *A) { return mzd_first_zero_row(A->x); } /** * \brief Clear the given row, but only begins at the column coloffset. * * \param M Matrix * \param row Index of row * \param coloffset Column offset * * \ingroup RowOperations */ static inline void mzed_row_clear_offset(mzed_t *M, const rci_t row, const rci_t coloffset) { mzd_row_clear_offset(M->x, row, coloffset*M->w); } /** * \brief Gaussian elimination. * * Perform Gaussian elimination on the matrix A. If full=0, then it * will do triangular style elimination, and if full=1, it will do * Gauss-Jordan style, or full elimination. * * \param A Matrix * \param full Gauss-Jordan style or upper unit-triangular form only. * * \ingroup Echelon */ rci_t mzed_echelonize_naive(mzed_t *A, int full); /** * \brief Print a matrix to stdout. * * \param M Matrix * * \ingroup StringConversions */ void mzed_print(const mzed_t *M); #endif //M4RIE_MATRIX_H libm4rie-20130416/src/mzed_intro.inl000066400000000000000000000014721212302364300171300ustar00rootroot00000000000000#define matrix_t mzed_t #define matrix_set_ui mzed_set_ui #define matrix_write_elem mzed_write_elem #define matrix_init_window mzed_init_window #define matrix_free_window mzed_free_window #define matrix_addmul mzed_addmul #define matrix_apply_p_right mzed_apply_p_right #define matrix_trsm_lower_left mzed_trsm_lower_left #define _matrix_trsm_lower_left _mzed_trsm_lower_left #define matrix_trsm_lower_left_naive mzed_trsm_lower_left_naive #define matrix_trsm_lower_left_newton_john mzed_trsm_lower_left_newton_john #define matrix_trsm_upper_left mzed_trsm_upper_left #define _matrix_trsm_upper_left _mzed_trsm_upper_left #define matrix_trsm_upper_left_naive mzed_trsm_upper_left_naive #define matrix_trsm_upper_left_newton_john mzed_trsm_upper_left_newton_john #define matrix_ple mzed_ple #define matrix_pluq mzed_pluq libm4rie-20130416/src/mzed_outro.inl000066400000000000000000000007441212302364300171460ustar00rootroot00000000000000#undef matrix_t #undef matrix_set_ui #undef matrix_write_elem #undef matrix_init_window #undef matrix_free_window #undef matrix_addmul #undef matrix_apply_p_right #undef matrix_trsm_lower_left #undef _matrix_trsm_lower_left #undef matrix_trsm_lower_left_naive #undef matrix_trsm_lower_left_newton_john #undef matrix_trsm_upper_left #undef _matrix_trsm_upper_left #undef matrix_trsm_upper_left_naive #undef matrix_trsm_upper_left_newton_john #undef matrix_ple #undef matrix_pluq libm4rie-20130416/src/newton_john.c000066400000000000000000000447441212302364300167570ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010,2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "config.h" #include #include #include #include #include "newton_john.h" #include "trsm.h" #include "ple.h" #include "conversion.h" njt_mzed_t *njt_mzed_init(const gf2e *ff, const rci_t ncols) { njt_mzed_t *T = m4ri_mm_malloc(sizeof(njt_mzed_t)); T->L = (rci_t*)m4ri_mm_calloc(__M4RI_TWOPOW(ff->degree), sizeof(rci_t)); T->T = mzed_init(ff, __M4RI_TWOPOW(ff->degree), ncols); T->M = mzed_init(ff, ff->degree, ncols); return T; } void njt_mzed_free(njt_mzed_t *T) { mzed_free(T->M); mzed_free(T->T); m4ri_mm_free(T->L); m4ri_mm_free(T); } /** * Compute C[rc,i] = C[rc,i] + T0[r0,i] + ... + T3[r3,i] for 0 <= i < ncols * * \param C Matrix * \apram rc Row index * \param T0 Matrix * \param r0 Row index * \param T1 Matrix * \param r1 Row index * \param T2 Matrix * \param r2 Row index * \param T3 Matrix * \param r3 Row index * * \wordoffset */ static inline void mzed_combine4(mzed_t *C, rci_t rc, mzed_t *T0, rci_t r0, mzed_t *T1, rci_t r1, mzed_t *T2, rci_t r2, mzed_t *T3, rci_t r3) { _mzd_combine4(C->x->rows[rc], T0->x->rows[r0], T1->x->rows[r1], T2->x->rows[r2], T3->x->rows[r3], C->x->width); } /** * Compute C[rc,i] = C[rc,i] + T0[r0,i] + ... + T7[r7,i] for 0 <= i < ncols * * \param C Matrix * \apram rc Row index * \param T0 Matrix * \param r0 Row index * \param T1 Matrix * \param r1 Row index * \param T2 Matrix * \param r2 Row index * \param T3 Matrix * \param r3 Row index * \param T4 Matrix * \param r4 Row index * \param T5 Matrix * \param r5 Row index * \param T6 Matrix * \param r6 Row index * \param T7 Matrix * \param r7 Row index * * \wordoffset */ static inline void mzed_combine8(mzed_t *C, rci_t rc, mzed_t *T0, rci_t r0, mzed_t *T1, rci_t r1, mzed_t *T2, rci_t r2, mzed_t *T3, rci_t r3, mzed_t *T4, rci_t r4, mzed_t *T5, rci_t r5, mzed_t *T6, rci_t r6, mzed_t *T7, rci_t r7) { _mzd_combine8(C->x->rows[rc], T0->x->rows[r0], T1->x->rows[r1], T2->x->rows[r2], T3->x->rows[r3], T4->x->rows[r4], T5->x->rows[r5], T6->x->rows[r6], T7->x->rows[r7], C->x->width); } /** * \brief Perform Gaussian reduction to reduced row echelon form on a * submatrix. * * The submatrix has dimension at most k starting at r x c of A. Checks * for pivot rows up to row endrow (exclusive). Terminates as soon as * finding a pivot column fails. * * \param A Matrix. * \param r First row. * \param c First column. * \param k Maximal dimension of identity matrix to produce. * \param end_row Maximal row index (exclusive) for rows to consider * for inclusion. */ rci_t _mzed_gauss_submatrix_full(mzed_t *A, const rci_t r, const rci_t c, const rci_t end_row, int k) { rci_t i,j,l; rci_t start_row = r; int found; word tmp; const gf2e *ff = A->finite_field; for (j=c; jx, i, start_row); /* clear above */ for (l=r; l A->finite_field->degree); if (T == NULL) T = njt_mzed_init(A->finite_field, A->ncols); mzd_set_ui(T->M->x,0); #if 0 for(rci_t i=0; i< T->T->nrows; i+=2) { T->L[i] = i; mzed_add_multiple_of_row(T->T, i, A, r, A->finite_field->mul[i], c); T->L[i+1] = i+1; mzed_copy_row(T->T, i+1, T->T, i); mzed_add_row(T->T, i+1, A, r, c); } #else const int degree = A->finite_field->degree; const wi_t homeblock = (A->w*c + A->x->offset) / m4ri_radix; const wi_t wide = T->M->x->width - homeblock; const word bitmask_end = __M4RI_LEFT_BITMASK((T->M->x->offset + T->M->x->ncols) % m4ri_radix); wi_t j; for(int i=0; iM, i, A, r, 1ULL<T->nrows; ++i) { word *ti = T->T->x->rows[i] + homeblock; word *ti1 = T->T->x->rows[i-1] + homeblock; const rci_t rowneeded = m4ri_codebook[degree]->inc[i - 1]; const int id = m4ri_codebook[degree]->ord[i]; T->L[id] = i; word *m = T->M->x->rows[rowneeded] + homeblock; /* there might still be stuff left over from the previous table creation, here we assume that this is at most 8 * m4ri_radix bits away. */ switch (homeblock) { case 0: break; default: case 8: *(ti-7) = 0; case 7: *(ti-6) = 0; case 6: *(ti-5) = 0; case 5: *(ti-4) = 0; case 4: *(ti-3) = 0; case 3: *(ti-2) = 0; case 2: *(ti-2) = 0; case 1: *(ti-1) = 0; } for(j = 0; j + 8 <= wide - 1; j += 8) { *ti++ = *m++ ^ *ti1++; *ti++ = *m++ ^ *ti1++; *ti++ = *m++ ^ *ti1++; *ti++ = *m++ ^ *ti1++; *ti++ = *m++ ^ *ti1++; *ti++ = *m++ ^ *ti1++; *ti++ = *m++ ^ *ti1++; *ti++ = *m++ ^ *ti1++; } switch(wide - j) { case 8: *ti++ = *m++ ^ *ti1++; case 7: *ti++ = *m++ ^ *ti1++; case 6: *ti++ = *m++ ^ *ti1++; case 5: *ti++ = *m++ ^ *ti1++; case 4: *ti++ = *m++ ^ *ti1++; case 3: *ti++ = *m++ ^ *ti1++; case 2: *ti++ = *m++ ^ *ti1++; case 1: *ti++ = (*m++ ^ *ti1++) & bitmask_end; } } #endif return T; } rci_t mzed_echelonize_newton_john(mzed_t *A, int full) { const gf2e* ff = A->finite_field; rci_t r,c; rci_t k = ff->degree; /* cf. mzd_echelonize_m4ri */ rci_t kk = (rci_t)m4ri_opt_k(A->x->nrows, A->x->ncols, 0); if (kk>=7) kk = 7; if ( (6*(1<ncols / 8.0) > __M4RI_CPU_L2_CACHE / 2.0 ) kk -= 1; kk = (6*kk)/k; /* enforcing bounds */ if (kk == 0) kk = 1; else if (kk > 6) kk = 6; rci_t kbar = 0; njt_mzed_t *T0 = njt_mzed_init(ff, A->ncols); njt_mzed_t *T1 = njt_mzed_init(ff, A->ncols); njt_mzed_t *T2 = njt_mzed_init(ff, A->ncols); njt_mzed_t *T3 = njt_mzed_init(ff, A->ncols); njt_mzed_t *T4 = njt_mzed_init(ff, A->ncols); njt_mzed_t *T5 = njt_mzed_init(ff, A->ncols); r = 0; c = 0; while(c < A->ncols) { if(c+kk > A->ncols) kk = A->ncols - c; /** * \todo we don't really compute the upper triangular form yet, * we need to implement _mzed_gauss_submatrix() and a better * table creation for that. */ kbar = _mzed_gauss_submatrix_full(A, r, c, A->nrows, kk); if (kbar == 6) { mzed_make_table(T0, A, r, c); mzed_make_table(T1, A, r+1, c+1); mzed_make_table(T2, A, r+2, c+2); mzed_make_table(T3, A, r+3, c+3); mzed_make_table(T4, A, r+4, c+4); mzed_make_table(T5, A, r+5, c+5); if(kbar == kk) mzed_process_rows6( A, r+6, A->nrows, c, T0, T1, T2, T3, T4, T5); if(full) mzed_process_rows6( A, 0, r, c, T0, T1, T2, T3, T4, T5); } else if(kbar == 5) { mzed_make_table(T0, A, r, c); mzed_make_table(T1, A, r+1, c+1); mzed_make_table(T2, A, r+2, c+2); mzed_make_table(T3, A, r+3, c+3); mzed_make_table(T4, A, r+4, c+4); if(kbar == kk) mzed_process_rows5( A, r+5, A->nrows, c, T0, T1, T2, T3, T4); if(full) mzed_process_rows5( A, 0, r, c, T0, T1, T2, T3, T4); } else if(kbar == 4) { mzed_make_table(T0, A, r, c); mzed_make_table(T1, A, r+1, c+1); mzed_make_table(T2, A, r+2, c+2); mzed_make_table(T3, A, r+3, c+3); if(kbar == kk) mzed_process_rows4( A, r+4, A->nrows, c, T0, T1, T2, T3); if(full) mzed_process_rows4( A, 0, r, c, T0, T1, T2, T3); } else if(kbar == 3) { mzed_make_table(T0, A, r, c ); mzed_make_table(T1, A, r+1, c+1); mzed_make_table(T2, A, r+2, c+2); if(kbar == kk) mzed_process_rows3( A, r+3, A->nrows, c, T0, T1, T2); if(full) mzed_process_rows3( A, 0, r, c, T0, T1, T2); } else if(kbar == 2) { mzed_make_table(T0, A, r, c ); mzed_make_table(T1, A, r+1, c+1); if(kbar == kk) mzed_process_rows2( A, r+2, A->nrows, c, T0, T1); if(full) mzed_process_rows2( A, 0, r, c, T0, T1); } else if (kbar == 1) { mzed_make_table(T0, A, r, c); if(kbar == kk) mzed_process_rows( A, r+1, A->nrows, c, T0); if(full) mzed_process_rows( A, 0, r, c, T0); } else { c++; } r += kbar; c += kbar; } njt_mzed_free(T0); njt_mzed_free(T1); njt_mzed_free(T2); njt_mzed_free(T3); njt_mzed_free(T4); njt_mzed_free(T5); return r; } rci_t mzed_ple_newton_john(mzed_t *A, mzp_t *P, mzp_t *Q) { rci_t col_pos = 0; rci_t row_pos = 0; word tmp = 0; const gf2e *ff = A->finite_field; rci_t i,j; int found = 0; njt_mzed_t *T0 = njt_mzed_init(A->finite_field, A->ncols); while (row_pos < A->nrows && col_pos < A->ncols) { found = 0; for(j=col_pos; jncols; j++) { for(i=row_pos; inrows; i++) { if( (tmp = mzed_read_elem(A, i,j)) != 0) { found = 1; break; } } if (found) break; } if (found) { P->values[row_pos] = i; Q->values[row_pos] = j; mzed_row_swap(A, row_pos, i); if (j+1 < A->ncols) { mzed_rescale_row(A, row_pos, j+1, gf2e_inv(ff, tmp)); mzed_make_table(T0, A, row_pos, j+1); mzed_process_rows(A, row_pos+1, A->nrows, j, T0); } row_pos++; col_pos = j + 1; } else { break; } } for (rci_t i = row_pos; i < A->nrows; ++i) P->values[i] = i; for (rci_t i = row_pos; i < A->ncols; ++i) Q->values[i] = i; for (rci_t i=0; i < row_pos; i++) { mzed_col_swap_in_rows(A, i, Q->values[i], i, A->nrows); } njt_mzed_free(T0); return row_pos; } mzed_t *_mzed_mul_newton_john0(mzed_t *C, const mzed_t *A, const mzed_t *B) { njt_mzed_t *T0 = njt_mzed_init(B->finite_field, B->ncols); for(rci_t i=0; i < A->ncols; i++) { mzed_make_table(T0, B, i, 0); for(rci_t j=0; jnrows; j++) mzd_combine(C->x, j, 0, C->x, j, 0, T0->T->x, T0->L[mzed_read_elem(A, j, i)], 0); } njt_mzed_free(T0); return C; } mzed_t *_mzed_mul_newton_john(mzed_t *C, const mzed_t *A, const mzed_t *B) { if (A->finite_field->degree > A->nrows) return _mzed_mul_naive(C, A, B); njt_mzed_t *T0 = njt_mzed_init(B->finite_field, B->ncols); njt_mzed_t *T1 = njt_mzed_init(B->finite_field, B->ncols); njt_mzed_t *T2 = njt_mzed_init(B->finite_field, B->ncols); njt_mzed_t *T3 = njt_mzed_init(B->finite_field, B->ncols); njt_mzed_t *T4 = njt_mzed_init(B->finite_field, B->ncols); njt_mzed_t *T5 = njt_mzed_init(B->finite_field, B->ncols); njt_mzed_t *T6 = njt_mzed_init(B->finite_field, B->ncols); njt_mzed_t *T7 = njt_mzed_init(B->finite_field, B->ncols); const rci_t kk = 8; const rci_t end = A->ncols/kk; rci_t blocksize = 1ULL<<30; if (A->nrows >= A->w*__M4RI_MUL_BLOCKSIZE) blocksize = __M4RI_MUL_BLOCKSIZE/A->w; rci_t giantstep, babystep; for (giantstep=0; giantstep + blocksize <= A->nrows; giantstep += blocksize) { for(rci_t i=0; i < end; i++) { mzed_make_table(T0, B, kk*i , 0); mzed_make_table(T1, B, kk*i+1, 0); mzed_make_table(T2, B, kk*i+2, 0); mzed_make_table(T3, B, kk*i+3, 0); mzed_make_table(T4, B, kk*i+4, 0); mzed_make_table(T5, B, kk*i+5, 0); mzed_make_table(T6, B, kk*i+6, 0); mzed_make_table(T7, B, kk*i+7, 0); for(babystep = 0; babystep < blocksize; babystep++) { const rci_t j = giantstep + babystep; const rci_t x0 = T0->L[mzed_read_elem(A, j, kk* i)]; const rci_t x1 = T1->L[mzed_read_elem(A, j, kk*i+1)]; const rci_t x2 = T2->L[mzed_read_elem(A, j, kk*i+2)]; const rci_t x3 = T3->L[mzed_read_elem(A, j, kk*i+3)]; const rci_t x4 = T4->L[mzed_read_elem(A, j, kk*i+4)]; const rci_t x5 = T5->L[mzed_read_elem(A, j, kk*i+5)]; const rci_t x6 = T6->L[mzed_read_elem(A, j, kk*i+6)]; const rci_t x7 = T7->L[mzed_read_elem(A, j, kk*i+7)]; mzed_combine8(C, j, T0->T, x0, T1->T, x1, T2->T, x2, T3->T, x3, T4->T, x4, T5->T, x5, T6->T, x6, T7->T, x7); } } } /* last giant step */ for(rci_t i=0; i < end; i++) { mzed_make_table(T0, B, kk*i , 0); mzed_make_table(T1, B, kk*i+1, 0); mzed_make_table(T2, B, kk*i+2, 0); mzed_make_table(T3, B, kk*i+3, 0); mzed_make_table(T4, B, kk*i+4, 0); mzed_make_table(T5, B, kk*i+5, 0); mzed_make_table(T6, B, kk*i+6, 0); mzed_make_table(T7, B, kk*i+7, 0); for(babystep = 0; babystep < A->nrows - giantstep; babystep++) { const rci_t j = giantstep + babystep; const rci_t x0 = T0->L[mzed_read_elem(A, j, kk* i)]; const rci_t x1 = T1->L[mzed_read_elem(A, j, kk*i+1)]; const rci_t x2 = T2->L[mzed_read_elem(A, j, kk*i+2)]; const rci_t x3 = T3->L[mzed_read_elem(A, j, kk*i+3)]; const rci_t x4 = T4->L[mzed_read_elem(A, j, kk*i+4)]; const rci_t x5 = T5->L[mzed_read_elem(A, j, kk*i+5)]; const rci_t x6 = T6->L[mzed_read_elem(A, j, kk*i+6)]; const rci_t x7 = T7->L[mzed_read_elem(A, j, kk*i+7)]; mzed_combine8(C, j, T0->T, x0, T1->T, x1, T2->T, x2, T3->T, x3, T4->T, x4, T5->T, x5, T6->T, x6, T7->T, x7); } } if (A->ncols%kk) { for(rci_t i=kk*end; i < A->ncols; i++) { mzed_make_table(T0, B, i, 0); for(rci_t j=0; jnrows; j++) mzd_combine(C->x, j, 0, C->x, j, 0, T0->T->x, T0->L[mzed_read_elem(A, j, i)], 0); } } njt_mzed_free(T0); njt_mzed_free(T1); njt_mzed_free(T2); njt_mzed_free(T3); njt_mzed_free(T4); njt_mzed_free(T5); njt_mzed_free(T6); njt_mzed_free(T7); return C; } mzed_t *mzed_mul_newton_john(mzed_t *C, const mzed_t *A, const mzed_t *B) { C = _mzed_mul_init(C,A,B, TRUE); return _mzed_mul_newton_john(C, A, B); } mzed_t *mzed_addmul_newton_john(mzed_t *C, const mzed_t *A, const mzed_t *B) { C = _mzed_mul_init(C,A,B, FALSE); return _mzed_mul_newton_john(C, A, B); } mzed_t *mzed_invert_newton_john(mzed_t *B, const mzed_t *A) { assert(A->nrows == A->ncols); mzed_t *I = mzed_init(A->finite_field, A->nrows, A->ncols); mzed_set_ui(I, 1); mzed_t *T = mzed_concat(NULL, A, I); mzed_free(I); rci_t r = mzed_echelonize_newton_john(T, 1); if (r != A->nrows) m4ri_die("mzed_invert_newton_john: input matrix does not have full rank."); B = mzed_submatrix(B, T, 0, A->ncols, A->nrows, T->ncols); mzed_free(T); return B; } void mzed_trsm_lower_left_newton_john(const mzed_t *L, mzed_t *B) { assert(L->finite_field == B->finite_field); assert(L->nrows == L->ncols); assert(B->nrows == L->ncols); const gf2e *ff = L->finite_field; if (__M4RI_TWOPOW(ff->degree) >= L->nrows) { mzed_trsm_lower_left_naive(L, B); return; } njt_mzed_t *T0 = njt_mzed_init(B->finite_field, B->ncols); for(rci_t i=0; inrows; i++) { mzed_rescale_row(B, i, 0, gf2e_inv(ff, mzed_read_elem(L, i, i))); mzed_make_table(T0, B, i, 0); for(rci_t j=i+1; jnrows; j++) mzd_combine(B->x, j, 0, B->x, j, 0, T0->T->x, T0->L[mzed_read_elem(L, j, i)], 0); } njt_mzed_free(T0); } void mzed_trsm_upper_left_newton_john(const mzed_t *U, mzed_t *B) { assert(U->finite_field == B->finite_field); assert(U->nrows == U->ncols); assert(B->nrows == U->ncols); const gf2e *ff = U->finite_field; if ( (__M4RI_TWOPOW(ff->degree) >= U->nrows) || B->x->offset ) { mzed_trsm_upper_left_naive(U, B); return; } njt_mzed_t *T0 = njt_mzed_init(B->finite_field, B->ncols); for(int i=B->nrows-1; i>=0; i--) { mzed_rescale_row(B, i, 0, gf2e_inv(ff, mzed_read_elem(U, i, i))); mzed_make_table(T0, B, i, 0); for(rci_t j=0; jx, j, 0, B->x, j, 0, T0->T->x, T0->L[mzed_read_elem(U, j, i)], 0); } njt_mzed_free(T0); } void mzd_slice_trsm_lower_left_newton_john(const mzd_slice_t *L, mzd_slice_t *B) { assert(B->x[0]->offset == 0); assert(L->finite_field == B->finite_field); assert(L->nrows == L->ncols); assert(B->nrows == L->ncols); const gf2e *ff = L->finite_field; if (__M4RI_TWOPOW(ff->degree) >= L->nrows) { mzd_slice_trsm_lower_left_naive(L, B); return; } mzed_t *Be = mzed_cling(NULL, B); njt_mzed_t *T0 = njt_mzed_init(B->finite_field, B->ncols); for(rci_t i=0; inrows; i++) { mzed_rescale_row(Be, i, 0, gf2e_inv(ff, mzd_slice_read_elem(L, i, i))); mzed_make_table(T0, Be, i, 0); for(rci_t j=i+1; jnrows; j++) mzd_combine(Be->x, j, 0, Be->x, j, 0, T0->T->x, T0->L[mzd_slice_read_elem(L, j, i)], 0); } mzed_slice(B, Be); mzed_free(Be); njt_mzed_free(T0); } void mzd_slice_trsm_upper_left_newton_john(const mzd_slice_t *U, mzd_slice_t *B) { assert(B->x[0]->offset == 0); assert(U->finite_field == B->finite_field); assert(U->nrows == U->ncols); assert(B->nrows == U->ncols); const gf2e *ff = U->finite_field; if ( (__M4RI_TWOPOW(ff->degree) >= U->nrows)) { mzd_slice_trsm_upper_left_naive(U, B); return; } mzed_t *Be = mzed_cling(NULL, B); njt_mzed_t *T0 = njt_mzed_init(Be->finite_field, Be->ncols); for(int i=B->nrows-1; i>=0; i--) { mzed_rescale_row(Be, i, 0, gf2e_inv(ff, mzd_slice_read_elem(U, i, i))); mzed_make_table(T0, Be, i, 0); for(rci_t j=0; jx, j, 0, Be->x, j, 0, T0->T->x, T0->L[mzd_slice_read_elem(U, j, i)], 0); } mzed_slice(B, Be); mzed_free(Be); njt_mzed_free(T0); } libm4rie-20130416/src/newton_john.h000066400000000000000000000235551212302364300167610ustar00rootroot00000000000000/** * \file newton_john.h * * \brief Newton-John table based algorithms * * \note These tables were formally known as Travolta tables. * * \author Martin Albrecht */ #ifndef M4RIE_NEWTON_JOHN_H #define M4RIE_NEWTON_JOHN_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010,2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include #include /** * \brief Newton-John table */ typedef struct { rci_t *L; /**< A map such that L[a] points to the row where the first entry is a. */ mzed_t *M; /**< Table of length \e with multiples of the input s.t. \f$a^i\f$ is the first entry of row \f$i\f$. */ mzed_t *T; /**< Actual table of length \f$2^e\f$ of all linear combinations of T. */ } njt_mzed_t; /** * \brief Allocate Newton-John table of dimension gf2e::degree<<1 * ncols. * * \param ff Finite field. * \param ncols Integer > 0. */ njt_mzed_t *njt_mzed_init(const gf2e *ff, const rci_t ncols); /** * \brief Free Newton-John table * * \param t Table */ void njt_mzed_free(njt_mzed_t *t); /** * \brief Construct Newton-John table T for row r of A, and element A[r,c]. * * \param T Preallocated Newton-John table or NULL. * \param A Matrix. * \param r Row index. * \param c Column index. */ njt_mzed_t * mzed_make_table(njt_mzed_t *T, const mzed_t *A, const rci_t r, const rci_t c); /** * \brief \f$C = A \cdot B\f$ using Newton-John tables. * * \param C Preallocated return matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa mzed_mul _mzed_mul_newton_john0() * * \ingroup Multiplication */ mzed_t *mzed_mul_newton_john(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$C = C + A \cdot B\f$ using Newton-John tables. * * \param C Preallocated product matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \sa _mzed_mul_newton_john() mzed_mul() * * \ingroup Multiplication */ mzed_t *mzed_addmul_newton_john(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$C = C + A \cdot B\f$ using Newton-John tables. * * This is a simple implementation for clarity of presentation. Do not * call, it is slow. * * \param C Preallocated product matrix. * \param A Input matrix A. * \param B Input matrix B. * * \sa mzed_mul_newton_john() mzed_mul() * * \ingroup Multiplication */ mzed_t *_mzed_mul_newton_john0(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief \f$C = C + A \cdot B\f$ using Newton-John tables. * * This is an optimised implementation. * * \param C Preallocated product matrix. * \param A Input matrix A. * \param B Input matrix B. * * \sa mzed_mul() * * \ingroup Multiplication */ mzed_t *_mzed_mul_newton_john(mzed_t *C, const mzed_t *A, const mzed_t *B); /** * \brief Reduce matrix A to row echelon form using Gauss-Newton-John * elimination. * * \param A Matrix to be reduced. * \param full If set to true, the reduced row echelon form will be * computed. * * \ingroup Echelon */ rci_t mzed_echelonize_newton_john(mzed_t *A, int full); /** * \brief Invert the matrix A using Gauss-Newton-John elimination. * * \param B Preallocated space for inversion matrix, may be NULL for * automatic creation. * \param A Matrix to be inverted. */ mzed_t *mzed_invert_newton_john(mzed_t *B, const mzed_t *A); /** * \brief \f$B = L^{-1} \cdot B\f$ using Newton-John tables. * * \param L Lower-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ void mzed_trsm_lower_left_newton_john(const mzed_t *L, mzed_t *B); /** * \brief \f$B = L^{-1} \cdot B\f$ using Newton-John tables. * * \param L Lower-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ void mzd_slice_trsm_lower_left_newton_john(const mzd_slice_t *L, mzd_slice_t *B); /** * \brief \f$B = U^{-1} \cdot B\f$ using Newton-John tables. * * \param U Upper-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ void mzed_trsm_upper_left_newton_john(const mzed_t *U, mzed_t *B); /** * \brief \f$B = U^{-1} \cdot B\f$ using Newton-John tables. * * \param U Upper-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ void mzd_slice_trsm_upper_left_newton_john(const mzd_slice_t *U, mzd_slice_t *B); /** * \brief PLE decomposition: \f$L \cdot E = P\cdot A\f$ using Newton-John tables. * * \ingroup PLE */ rci_t mzed_ple_newton_john(mzed_t *A, mzp_t *P, mzp_t *Q); /** * \brief The function looks up 6 entries from position i,startcol in * each row and adds the appropriate row from T to the row i. * * This process is iterated for i from startrow to stoprow * (exclusive). * * \param M Matrix to operate on * \param startrow top row which is operated on * \param endrow bottom row which is operated on * \param startcol Starting column for addition * \param T Newton-John table * * \ingroup RowOperations */ static inline void mzed_process_rows(mzed_t *M, const rci_t startrow, const rci_t endrow, rci_t startcol, const njt_mzed_t *T) { mzd_process_rows(M->x, startrow, endrow, startcol*M->w, M->w, T->T->x, T->L); } /** * \brief Same as mzed_process_rows but works with two Newton-John tables * in parallel. * * \param M Matrix to operate on * \param startrow top row which is operated on * \param endrow bottom row which is operated on * \param startcol Starting column for addition * \param T0 Newton-John table * \param T1 Newton-John table * * \ingroup RowOperations */ static inline void mzed_process_rows2(mzed_t *M, const rci_t startrow, const rci_t endrow, const rci_t startcol, const njt_mzed_t *T0, const njt_mzed_t *T1) { mzd_process_rows2(M->x, startrow, endrow, startcol*M->w, 2*M->w, T0->T->x, T0->L, T1->T->x, T1->L); } /** * \brief Same as mzed_process_rows but works with three Newton-John * tables in parallel. * * \param M Matrix to operate on * \param startrow top row which is operated on * \param endrow bottom row which is operated on * \param startcol Starting column for addition * \param T0 Newton-John table * \param T1 Newton-John table * \param T2 Newton-John table * * \ingroup RowOperations */ static inline void mzed_process_rows3(mzed_t *M, const rci_t startrow, const rci_t endrow, const rci_t startcol, const njt_mzed_t *T0, const njt_mzed_t *T1, const njt_mzed_t *T2) { mzd_process_rows3(M->x, startrow, endrow, startcol*M->w, 3*M->w, T0->T->x, T0->L, T1->T->x, T1->L, T2->T->x, T2->L); } /** * \brief Same as mzed_process_rows but works with four Newton-John * tables in parallel. * * \param M Matrix to operate on * \param startrow top row which is operated on * \param endrow bottom row which is operated on * \param startcol Starting column for addition * \param T0 Newton-John table * \param T1 Newton-John table * \param T2 Newton-John table * \param T3 Newton-John table * * \ingroup RowOperations */ static inline void mzed_process_rows4(mzed_t *M, const rci_t startrow, const rci_t endrow, const rci_t startcol, const njt_mzed_t *T0, const njt_mzed_t *T1, const njt_mzed_t *T2, const njt_mzed_t *T3) { mzd_process_rows4(M->x, startrow, endrow, startcol*M->w, 4*M->w, T0->T->x, T0->L, T1->T->x, T1->L, T2->T->x, T2->L, T3->T->x, T3->L); } /** * \brief Same as mzed_process_rows but works with five Newton-John * tables in parallel. * * \param M Matrix to operate on * \param startrow top row which is operated on * \param endrow bottom row which is operated on * \param startcol Starting column for addition * \param T0 Newton-John table * \param T1 Newton-John table * \param T2 Newton-John table * \param T3 Newton-John table * \param T4 Newton-John table * * \ingroup RowOperations */ static inline void mzed_process_rows5(mzed_t *M, const rci_t startrow, const rci_t endrow, const rci_t startcol, const njt_mzed_t *T0, const njt_mzed_t *T1, const njt_mzed_t *T2, const njt_mzed_t *T3, const njt_mzed_t *T4) { mzd_process_rows5(M->x, startrow, endrow, startcol*M->w, 5*M->w, T0->T->x, T0->L, T1->T->x, T1->L, T2->T->x, T2->L, T3->T->x, T3->L, T4->T->x, T4->L); } /** * \brief Same as mzed_process_rows but works with six Newton-John tables * in parallel. * * \param M Matrix to operate on * \param startrow top row which is operated on * \param endrow bottom row which is operated on * \param startcol Starting column for addition * \param T0 Newton-John table * \param T1 Newton-John table * \param T2 Newton-John table * \param T3 Newton-John table * \param T4 Newton-John table * \param T5 Newton-John table * * \ingroup RowOperations */ static inline void mzed_process_rows6(mzed_t *M, const rci_t startrow, const rci_t endrow, const rci_t startcol, const njt_mzed_t *T0, const njt_mzed_t *T1, const njt_mzed_t *T2, const njt_mzed_t *T3, const njt_mzed_t *T4, const njt_mzed_t *T5) { mzd_process_rows6(M->x, startrow, endrow, startcol*M->w, 6*M->w, T0->T->x, T0->L, T1->T->x, T1->L, T2->T->x, T2->L, T3->T->x, T3->L, T4->T->x, T4->L, T5->T->x, T5->L); } #endif //M4RIE_NEWTON_JOHN_H libm4rie-20130416/src/permutation.h000066400000000000000000000102401212302364300167630ustar00rootroot00000000000000/** * \file permutation.h * \brief Permutation matrices. * * \author Martin Albrecht */ #ifndef M4RIE_PERMUTATION_H #define M4RIE_PERMUTATION_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include #include /** * Apply the permutation P to A from the left. * * This is equivalent to row swaps walking from 0 to length-1. * * \param A Matrix. * \param P Permutation. */ static inline void mzed_apply_p_left(mzed_t *A, mzp_t const *P) { mzd_apply_p_left(A->x, P); } /** * Apply the permutation P to A from the left but transpose P before. * * This is equivalent to row swaps walking from length-1 to 0. * * \param A Matrix. * \param P Permutation. */ static inline void mzed_apply_p_left_trans(mzed_t *A, mzp_t const *P) { mzd_apply_p_left_trans(A->x, P); } /** * Apply the permutation P to A from the right. * * This is equivalent to column swaps walking from length-1 to 0. * * \param A Matrix. * \param P Permutation. */ static inline void mzed_apply_p_right(mzed_t *A, mzp_t const *P) { if(A->nrows == 0) return; rci_t const length = MIN(P->length, A->ncols); for (rci_t i = length-1; i >= 0; --i) { mzed_col_swap(A, i, P->values[i]); } } /** * Apply the permutation P to A from the right but transpose P before. * * This is equivalent to column swaps walking from 0 to length-1. * * \param A Matrix. * \param P Permutation. */ static inline void mzed_apply_p_right_trans(mzed_t *A, mzp_t const *P) { if(A->nrows == 0) return; rci_t const length = MIN(P->length, A->ncols); for (rci_t i = 0; i < length; ++i) { mzed_col_swap(A, i, P->values[i]); } } /** * Apply the permutation P to A from the left. * * This is equivalent to row swaps walking from 0 to length-1. * * \param A Matrix. * \param P Permutation. */ static inline void mzd_slice_apply_p_left(mzd_slice_t *A, mzp_t const *P) { for(int i=0; idepth; i++) { mzd_apply_p_left(A->x[i], P); } } /** * Apply the permutation P to A from the left but transpose P before. * * This is equivalent to row swaps walking from length-1 to 0. * * \param A Matrix. * \param P Permutation. */ static inline void mzd_slice_apply_p_left_trans(mzd_slice_t *A, mzp_t const *P) { for(int i=0; idepth; i++) { mzd_apply_p_left_trans(A->x[i], P); } } /** * Apply the permutation P to A from the right. * * This is equivalent to column swaps walking from length-1 to 0. * * \param A Matrix. * \param P Permutation. */ static inline void mzd_slice_apply_p_right(mzd_slice_t *A, mzp_t const *P) { for(int i=0; idepth; i++) { mzd_apply_p_right(A->x[i], P); } } /** * Apply the permutation P to A from the right but transpose P before. * * This is equivalent to column swaps walking from 0 to length-1. * * \param A Matrix. * \param P Permutation. */ static inline void mzd_slice_apply_p_right_trans(mzd_slice_t *A, mzp_t const *P) { for(int i=0; idepth; i++) { mzd_apply_p_right_trans(A->x[i], P); } } /** * Apply the permutation P to A from the right, but only on the upper * the matrix A above the main diagonal. * * This is equivalent to column swaps walking from 0 to length-1 and * is used to compress PLE to PLUQ. * * \param A Matrix. * \param P Permutation. */ static inline void mzd_slice_apply_p_right_trans_tri(mzd_slice_t *A, mzp_t const *P) { for(int i=0; idepth; i++) { mzd_apply_p_right_trans_tri(A->x[i], P); } } #endif // M4RIE_PERMUTATION_H libm4rie-20130416/src/ple.c000066400000000000000000000126311212302364300151750ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "permutation.h" #include "trsm.h" #include "ple.h" #include "newton_john.h" rci_t mzed_ple_naive(mzed_t *A, mzp_t *P, mzp_t *Q) { rci_t col_pos = 0; rci_t row_pos = 0; word tmp = 0; const gf2e *ff = A->finite_field; rci_t i,j; int found = 0; while (row_pos < A->nrows && col_pos < A->ncols) { found = 0; for(j=col_pos; jncols; j++) { for(i=row_pos; inrows; i++) { if( (tmp = mzed_read_elem(A, i,j)) != 0) { found = 1; break; } } if (found) break; } if (found) { P->values[row_pos] = i; Q->values[row_pos] = j; mzed_row_swap(A, row_pos, i); if(j+1 < A->ncols) { mzed_rescale_row(A, row_pos, j+1, gf2e_inv(ff, tmp)); for(rci_t l=row_pos+1; lnrows; l++) { if ((tmp = mzed_read_elem(A,l,j))) mzed_add_multiple_of_row(A, l, A, row_pos, tmp, j+1); } } row_pos++; col_pos = j + 1; } else { break; } } for (rci_t i = row_pos; i < A->nrows; ++i) P->values[i] = i; for (rci_t i = row_pos; i < A->ncols; ++i) Q->values[i] = i; for (rci_t i=0; i < row_pos; i++) { mzed_col_swap_in_rows(A, i, Q->values[i], i, A->nrows); } return row_pos; } rci_t _mzed_ple(mzed_t *A, mzp_t *P, mzp_t *Q, rci_t cutoff) { if (cutoff == 0) cutoff = __M4RIE_PLE_CUTOFF; if ((A->ncols > m4ri_radix && (gf2e_degree_to_w(A->finite_field) * A->ncols * A->nrows) > cutoff)) { mzd_slice_t *a = mzed_slice(NULL, A); rci_t r = _mzd_slice_ple(a, P, Q, cutoff); mzed_cling(A, a); mzd_slice_free(a); return r; } else { return mzed_ple_newton_john(A, P, Q); } } rci_t _mzd_slice_ple(mzd_slice_t *A, mzp_t *P, mzp_t *Q, rci_t cutoff) { assert(A->x[0]->offset == 0); const rci_t ncols = A->ncols; const rci_t nrows = A->nrows; if (cutoff == 0) cutoff = __M4RIE_PLE_CUTOFF; if (ncols <= m4ri_radix || (gf2e_degree_to_w(A->finite_field) * A->ncols * A->nrows) <= cutoff) { mzed_t *Abar = mzed_cling(NULL, A); rci_t r = mzed_ple_newton_john(Abar, P, Q); mzed_slice(A, Abar); mzed_free(Abar); return r; } /* n1 * ------------------------------------------ * | A0 | A1 | * ------------------------------------------ */ rci_t n1 = (((ncols - 1) / m4ri_radix + 1) >> 1) * m4ri_radix; mzd_slice_t *A0 = mzd_slice_init_window(A, 0, 0, nrows, n1); mzd_slice_t *A1 = mzd_slice_init_window(A, 0, n1, nrows, ncols); mzp_t *P1 = mzp_init_window(P, 0, nrows); mzp_t *Q1 = mzp_init_window(Q, 0, A0->ncols); rci_t r1 = _mzd_slice_ple(A0, P1, Q1, cutoff); /* r1 n1 * ------------------------------------------ * | A00 | | A01 | * r1------------------------------------------ * | A10 | | A11 | * ------------------------------------------ */ mzd_slice_t *A00 = mzd_slice_init_window(A, 0, 0, r1, r1); mzd_slice_t *A10 = mzd_slice_init_window(A, r1, 0, nrows, r1); mzd_slice_t *A01 = mzd_slice_init_window(A, 0, n1, r1, ncols); mzd_slice_t *A11 = mzd_slice_init_window(A, r1, n1, nrows, ncols); if (r1) { /* Computation of the Schur complement */ mzd_slice_apply_p_left(A1, P1); mzd_slice_trsm_lower_left(A00, A01); mzd_slice_addmul(A11, A10, A01); } mzp_free_window(P1); mzp_free_window(Q1); mzp_t *P2 = mzp_init_window(P, r1, nrows); mzp_t *Q2 = mzp_init_window(Q, n1, ncols); rci_t r2 = _mzd_slice_ple(A11, P2, Q2, cutoff); /* n * ------------------- * | A0b | * r1----------------- * | A1b | * ------------------- */ /* Update A10 */ mzd_slice_apply_p_left(A10, P2); /* Update P */ for (rci_t i = 0; i < nrows - r1; ++i) P2->values[i] += r1; /* Update the A0b block (permutation + rotation) */ for(rci_t i=0, j=n1; j < ncols; ++i, ++j) Q2->values[i] += n1; for(rci_t i=n1, j = r1; i < n1 + r2; ++i, ++j) Q->values[j] = Q->values[i]; _mzd_slice_compress_l(A, r1, n1, r2); mzp_free_window(Q2); mzp_free_window(P2); mzd_slice_free_window(A0); mzd_slice_free_window(A1); mzd_slice_free_window(A00); mzd_slice_free_window(A01); mzd_slice_free_window(A10); mzd_slice_free_window(A11); return r1 + r2; } rci_t _mzd_slice_pluq(mzd_slice_t *A, mzp_t *P, mzp_t *Q, rci_t cutoff) { rci_t r = _mzd_slice_ple(A, P, Q, cutoff); if(r && r < A->nrows) { mzd_slice_t *A0 = mzd_slice_init_window(A, 0, 0, r, A->ncols); mzd_slice_apply_p_right_trans_tri(A0, Q); mzd_slice_free_window(A0); } else { mzd_slice_apply_p_right_trans_tri(A, Q); } return r; } libm4rie-20130416/src/ple.h000066400000000000000000000146751212302364300152140ustar00rootroot00000000000000/** * \file ple.h * \brief PLE decomposition: \f$ L \cdot E = P \cdot A\f$. * * \author Martin Albrecht */ #ifndef M4RIE_PLE_H #define M4RIE_PLE_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include #include #include /** * \brief PLE decomposition: \f$ L \cdot E = P \cdot A \f$. * * Modifies A in place to store lower triangular L below (and on) the * main diagonal and E -- an echelon form of A -- above the main * diagonal (pivots are stored in Q). P and Q are updated with row and * column permutations respectively. * * This function uses naive cubic PLE decomposition depending on the * size of the underlying field. * * \param A Matrix * \param P Permutation vector of length A->nrows * \param Q Permutation vector of length A->ncols * * \ingroup PLE * * \sa mzed_ple_newton_john() mzed_ple() */ rci_t mzed_ple_naive(mzed_t *A, mzp_t *P, mzp_t *Q); /** * \brief PLE decomposition: \f$ L \cdot E = P \cdot A \f$. * * Modifies A in place to store lower triangular L below (and on) the * main diagonal and E -- an echelon form of A -- above the main * diagonal (pivots are stored in Q). P and Q are updated with row and * column permutations respectively. * * This function uses either asymptotically fast PLE decomposition by * reducing it to matrix multiplication or naive cubic PLE * decomposition depending on the size of the underlying field. If * asymptotically fast PLE decomposition is used, then the algorithm * switches to mzed_ple_newton_john if e * ncols * nrows is <= cutoff * where e is the exponent of the finite field. * * \param A Matrix * \param P Permutation vector of length A->nrows * \param Q Permutation vector of length A->ncols * \param cutoff Integer * * \ingroup PLE * * \sa mzed_ple_naive() mzed_ple_newton_john() mzed_ple() */ rci_t _mzd_slice_ple(mzd_slice_t *A, mzp_t *P, mzp_t *Q, rci_t cutoff); /** * \brief PLE decomposition: \f$ L \cdot E = P \cdot A \f$. * * Modifies A in place to store lower triangular L below (and on) the * main diagonal and E -- an echelon form of A -- above the main * diagonal (pivots are stored in Q). P and Q are updated with row and * column permutations respectively. * * This function implements asymptotically fast PLE decomposition by * reducing it to matrix multiplication. * * \param A Matrix * \param P Permutation vector of length A->nrows * \param Q Permutation vector of length A->ncols * * \ingroup PLE * * \sa mzed_ple_naive() mzed_ple_newton_john() _mzd_slice_ple() */ static inline rci_t mzd_slice_ple(mzd_slice_t *A, mzp_t *P, mzp_t *Q) { assert(P->length == A->nrows); assert(Q->length == A->ncols); return _mzd_slice_ple(A, P, Q, 0); } /** * \brief PLUQ decomposition: \f$ L \cdot U \cdot Q = P \cdot A\f$. * * This function implements asymptotically fast PLE decomposition by * reducing it to matrix multiplication. From PLE the PLUQ * decomposition is then obtained. * * \param A Matrix * \param P Permutation vector of length A->nrows * \param Q Permutation vector of length A->ncols * \param cutoff Crossover to base case if mzed_t::w * mzed_t::ncols * mzed_t::nrows < cutoff. * * \ingroup PLE */ rci_t _mzd_slice_pluq(mzd_slice_t *A, mzp_t *P, mzp_t *Q, rci_t cutoff); /** * \brief PLUQ decomposition: \f$ L \cdot U \cdot Q = P \cdot A\f$. * * This function implements asymptotically fast PLE decomposition by * reducing it to matrix multiplication. From PLE the PLUQ * decomposition is then obtained. * * \param A Matrix * \param P Permutation vector of length A->nrows * \param Q Permutation vector of length A->ncols * * \ingroup PLE */ static inline rci_t mzd_slice_pluq(mzd_slice_t *A, mzp_t *P, mzp_t *Q) { assert(P->length == A->nrows); assert(Q->length == A->ncols); return _mzd_slice_pluq(A, P, Q, 0); } /** * \brief PLE decomposition: \f$ L \cdot E = P \cdot A \f$. * * Modifies A in place to store lower triangular L below (and on) the * main diagonal and E -- an echelon form of A -- above the main * diagonal (pivots are stored in Q). P and Q are updated with row and * column permutations respectively. * * This function uses either asymptotically fast PLE decomposition by * reducing it to matrix multiplication or naive cubic PLE * decomposition depending on the size of the underlying field. If * asymptotically fast PLE decomposition is used, then the algorithm * switches to mzed_ple_newton_john if e * ncols * nrows is <= cutoff * where e is the exponent of the finite field. * * \param A Matrix * \param P Permutation vector of length A->nrows * \param Q Permutation vector of length A->ncols * \param cutoff Integer >= 0 * * \ingroup PLE * * \sa mzed_ple_naive() mzed_ple_newton_john() _mzed_ple() */ rci_t _mzed_ple(mzed_t *A, mzp_t *P, mzp_t *Q, rci_t cutoff); /** * Default crossover to PLE base case (Newton-John based). */ #define __M4RIE_PLE_CUTOFF (__M4RI_CPU_L2_CACHE<<2) /** * \brief PLE decomposition: \f$ L \cdot E = P \cdot A \f$. * * Modifies A in place to store lower triangular L below (and on) the * main diagonal and E -- an echelon form of A -- above the main * diagonal (pivots are stored in Q). P and Q are updated with row and * column permutations respectively. * * This function uses either asymptotically fast PLE decomposition by * reducing it to matrix multiplication or naive cubic PLE * decomposition depending on the size of the underlying field. * * \param A Matrix * \param P Permutation vector of length A->nrows * \param Q Permutation vector of length A->ncols * * \ingroup PLE * * \sa mzed_ple_naive() mzed_ple_newton_john() _mzed_ple() * */ static inline rci_t mzed_ple(mzed_t *A, mzp_t *P, mzp_t *Q) { return _mzed_ple(A, P, Q, __M4RIE_PLE_CUTOFF); } #endif //M4RIE_PLE_H libm4rie-20130416/src/strassen.c000066400000000000000000000311371212302364300162610ustar00rootroot00000000000000/****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "config.h" #include #include #include #include "mzed.h" #include "newton_john.h" #include "mzd_slice.h" #include "strassen.h" #define CLOSER(a,b,target) (abs((long)a-(long)target)x->offset == 0); assert(A->x->offset == 0); assert(B->x->offset == 0); if((C->nrows| C->ncols) == 0) return C; rci_t m = A->nrows; rci_t k = A->ncols; rci_t n = B->ncols; /* handle case first, where the input matrices are too small already */ if (CLOSER(m, m/2, cutoff) || CLOSER(k, k/2, cutoff) || CLOSER(n, n/2, cutoff)) { /* we copy the matrix first since it is only constant memory overhead and improves data locality, if you remove it make sure there are no speed regressions */ /* C = _mzd_mul_m4rm(C, A, B, 0, TRUE); */ mzed_t *Cbar = mzed_init(C->finite_field, C->nrows, C->ncols); _mzed_mul_newton_john(Cbar, A, B); mzed_copy(C, Cbar); mzed_free(Cbar); return C; } rci_t mmm = m/2; rci_t kkk = k/2; rci_t nnn = n/2; mmm = (mmm - mmm%(m4ri_radix/A->w)); kkk = (kkk - kkk%(m4ri_radix/A->w)); nnn = (nnn - nnn%(m4ri_radix/A->w)); /* |A | |B | |C | * Compute | | x | | = | | */ mzed_t *A11 = mzed_init_window(A, 0, 0, mmm, kkk); mzed_t *A12 = mzed_init_window(A, 0, kkk, mmm, 2*kkk); mzed_t *A21 = mzed_init_window(A, mmm, 0, 2*mmm, kkk); mzed_t *A22 = mzed_init_window(A, mmm, kkk, 2*mmm, 2*kkk); mzed_t *B11 = mzed_init_window(B, 0, 0, kkk, nnn); mzed_t *B12 = mzed_init_window(B, 0, nnn, kkk, 2*nnn); mzed_t *B21 = mzed_init_window(B, kkk, 0, 2*kkk, nnn); mzed_t *B22 = mzed_init_window(B, kkk, nnn, 2*kkk, 2*nnn); mzed_t *C11 = mzed_init_window(C, 0, 0, mmm, nnn); mzed_t *C12 = mzed_init_window(C, 0, nnn, mmm, 2*nnn); mzed_t *C21 = mzed_init_window(C, mmm, 0, 2*mmm, nnn); mzed_t *C22 = mzed_init_window(C, mmm, nnn, 2*mmm, 2*nnn); /** * \note See Marco Bodrato; "A Strassen-like Matrix Multiplication * Suited for Squaring and Highest Power Computation"; * http://bodrato.it/papres/#CIVV2008 for reference on the used * sequence of operations. */ /* change this to mzd_init(mmm, MAX(nnn,kkk)) to fix the todo below */ mzed_t *Wmk = mzed_init(A->finite_field, mmm, kkk); mzed_t *Wkn = mzed_init(A->finite_field, kkk, nnn); _mzed_add(Wkn, B22, B12); /* Wkn = B22 + B12 */ _mzed_add(Wmk, A22, A12); /* Wmk = A22 + A12 */ _mzed_mul_strassen(C21, Wmk, Wkn, cutoff); /* C21 = Wmk * Wkn */ _mzed_add(Wmk, A22, A21); /* Wmk = A22 - A21 */ _mzed_add(Wkn, B22, B21); /* Wkn = B22 - B21 */ _mzed_mul_strassen(C22, Wmk, Wkn, cutoff); /* C22 = Wmk * Wkn */ _mzed_add(Wkn, Wkn, B12); /* Wkn = Wkn + B12 */ _mzed_add(Wmk, Wmk, A12); /* Wmk = Wmk + A12 */ _mzed_mul_strassen(C11, Wmk, Wkn, cutoff); /* C11 = Wmk * Wkn */ _mzed_add(Wmk, Wmk, A11); /* Wmk = Wmk - A11 */ _mzed_mul_strassen(C12, Wmk, B12, cutoff); /* C12 = Wmk * B12 */ _mzed_add(C12, C12, C22); /* C12 = C12 + C22 */ mzed_free(Wmk); Wmk = mzed_mul_strassen(NULL, A12, B21, cutoff);/*Wmk = A12 * B21 */ _mzed_add(C11, C11, Wmk); /* C11 = C11 + Wmk */ _mzed_add(C12, C11, C12); /* C12 = C11 - C12 */ _mzed_add(C11, C21, C11); /* C11 = C21 - C11 */ _mzed_add(Wkn, Wkn, B11); /* Wkn = Wkn - B11 */ _mzed_mul_strassen(C21, A21, Wkn, cutoff); /* C21 = A21 * Wkn */ mzed_free(Wkn); _mzed_add(C21, C11, C21); /* C21 = C11 - C21 */ _mzed_add(C22, C22, C11); /* C22 = C22 + C11 */ _mzed_mul_strassen(C11, A11, B11, cutoff); /* C11 = A11 * B11 */ _mzed_add(C11, C11, Wmk); /* C11 = C11 + Wmk */ /* clean up */ mzed_free_window(A11); mzed_free_window(A12); mzed_free_window(A21); mzed_free_window(A22); mzed_free_window(B11); mzed_free_window(B12); mzed_free_window(B21); mzed_free_window(B22); mzed_free_window(C11); mzed_free_window(C12); mzed_free_window(C21); mzed_free_window(C22); mzed_free(Wmk); /* deal with rest */ nnn*=2; if (n > nnn) { /* |AA| | B| | C| * Compute |AA| x | B| = | C| */ mzed_t *B_last_col = mzed_init_window(B, 0, nnn, k, n); mzed_t *C_last_col = mzed_init_window(C, 0, nnn, m, n); mzed_set_ui(C_last_col, 0); _mzed_mul_newton_john(C_last_col, A, B_last_col); mzed_free_window(B_last_col); mzed_free_window(C_last_col); } mmm*=2; if (m > mmm) { /* | | |B | | | * Compute |AA| x |B | = |C | */ mzed_t *A_last_row = mzed_init_window(A, mmm, 0, m, k); mzed_t *B_first_col= mzed_init_window(B, 0, 0, k, nnn); mzed_t *C_last_row = mzed_init_window(C, mmm, 0, m, nnn); mzed_set_ui(C_last_row, 0); _mzed_mul_newton_john(C_last_row, A_last_row, B_first_col); mzed_free_window(A_last_row); mzed_free_window(B_first_col); mzed_free_window(C_last_row); } kkk*=2; if (k > kkk) { /* Add to | | | B| |C | * result |A | x | | = | | */ mzed_t *A_last_col = mzed_init_window(A, 0, kkk, mmm, k); mzed_t *B_last_row = mzed_init_window(B, kkk, 0, k, nnn); mzed_t *C_bulk = mzed_init_window(C, 0, 0, mmm, nnn); _mzed_mul_newton_john(C_bulk, A_last_col, B_last_row); mzed_free_window(A_last_col); mzed_free_window(B_last_row); mzed_free_window(C_bulk); } return C; } mzed_t *_mzed_addmul_strassen(mzed_t *C, const mzed_t *A, const mzed_t *B, int cutoff) { assert(C->x->offset == 0); assert(A->x->offset == 0); assert(B->x->offset == 0); if((C->nrows| C->ncols) == 0) return C; rci_t m = A->nrows; rci_t k = A->ncols; rci_t n = B->ncols; /* handle case first, where the input matrices are too small already */ if (CLOSER(m, m/2, cutoff) || CLOSER(k, k/2, cutoff) || CLOSER(n, n/2, cutoff)) { /* we copy the matrix first since it is only constant memory overhead and improves data locality, if you remove it make sure there are no speed regressions */ /* C = _mzd_mul_m4rm(C, A, B, 0, TRUE); */ mzed_t *Cbar = mzed_copy(NULL, C); _mzed_mul_newton_john(Cbar, A, B); mzed_copy(C, Cbar); mzed_free(Cbar); return C; } rci_t mmm = m/2; rci_t kkk = k/2; rci_t nnn = n/2; mmm = (mmm - mmm%(m4ri_radix/A->w)); kkk = (kkk - kkk%(m4ri_radix/A->w)); nnn = (nnn - nnn%(m4ri_radix/A->w)); /* |A | |B | |C | * Compute | | x | | = | | */ mzed_t *A11 = mzed_init_window(A, 0, 0, mmm, kkk); mzed_t *A12 = mzed_init_window(A, 0, kkk, mmm, 2*kkk); mzed_t *A21 = mzed_init_window(A, mmm, 0, 2*mmm, kkk); mzed_t *A22 = mzed_init_window(A, mmm, kkk, 2*mmm, 2*kkk); mzed_t *B11 = mzed_init_window(B, 0, 0, kkk, nnn); mzed_t *B12 = mzed_init_window(B, 0, nnn, kkk, 2*nnn); mzed_t *B21 = mzed_init_window(B, kkk, 0, 2*kkk, nnn); mzed_t *B22 = mzed_init_window(B, kkk, nnn, 2*kkk, 2*nnn); mzed_t *C11 = mzed_init_window(C, 0, 0, mmm, nnn); mzed_t *C12 = mzed_init_window(C, 0, nnn, mmm, 2*nnn); mzed_t *C21 = mzed_init_window(C, mmm, 0, 2*mmm, nnn); mzed_t *C22 = mzed_init_window(C, mmm, nnn, 2*mmm, 2*nnn); /** * \note See Marco Bodrato; "A Strassen-like Matrix Multiplication * Suited for Squaring and Highest Power Computation"; * http://bodrato.it/papres/#CIVV2008 for reference on the used * sequence of operations. */ mzed_t *S = mzed_init(A->finite_field, mmm, kkk); mzed_t *T = mzed_init(A->finite_field, kkk, nnn); mzed_t *U = mzed_init(A->finite_field, mmm, nnn); _mzed_add(S, A22, A21); /* 1 S = A22 - A21 */ _mzed_add(T, B22, B21); /* 2 T = B22 - B21 */ _mzed_mul_strassen(U, S, T, cutoff); /* 3 U = S*T */ _mzed_add(C22, U, C22); /* 4 C22 = U + C22 */ _mzed_add(C12, U, C12); /* 5 C12 = U + C12 */ _mzed_mul_strassen(U, A12, B21, cutoff); /* 8 U = A12*B21 */ _mzed_add(C11, U, C11); /* 9 C11 = U + C11 */ _mzed_addmul_strassen(C11, A11, B11, cutoff); /* 11 C11 = A11*B11 + C11 */ _mzed_add(S, S, A12); /* 6 S = S - A12 */ _mzed_add(T, T, B12); /* 7 T = T - B12 */ _mzed_addmul_strassen(U, S, T, cutoff); /* 10 U = S*T + U */ _mzed_add(C12, C12, U); /* 15 C12 = U + C12 */ _mzed_add(S, A11, S); /* 12 S = A11 - S */ _mzed_addmul_strassen(C12, S, B12, cutoff); /* 14 C12 = S*B12 + C12 */ _mzed_add(T, B11, T); /* 13 T = B11 - T */ _mzed_addmul_strassen(C21, A21, T, cutoff); /* 16 C21 = A21*T + C21 */ _mzed_add(S, A22, A12); /* 17 S = A22 + A21 */ _mzed_add(T, B22, B12); /* 18 T = B22 + B21 */ _mzed_addmul_strassen(U, S, T, cutoff); /* 19 U = U - S*T */ _mzed_add(C21, C21, U); /* 20 C21 = C21 - U3 */ _mzed_add(C22, C22, U); /* 21 C22 = C22 - U3 */ /* clean up */ mzed_free_window(A11); mzed_free_window(A12); mzed_free_window(A21); mzed_free_window(A22); mzed_free_window(B11); mzed_free_window(B12); mzed_free_window(B21); mzed_free_window(B22); mzed_free_window(C11); mzed_free_window(C12); mzed_free_window(C21); mzed_free_window(C22); mzed_free(S); mzed_free(T); mzed_free(U); /* deal with rest */ nnn*=2; if (n > nnn) { /* |AA| | B| | C| * Compute |AA| x | B| = | C| */ mzed_t const *B_last_col = mzed_init_window(B, 0, nnn, k, n); mzed_t *C_last_col = mzed_init_window(C, 0, nnn, m, n); _mzed_mul_newton_john(C_last_col, A, B_last_col); mzed_free_window((mzed_t*)B_last_col); mzed_free_window((mzed_t*)C_last_col); } mmm*=2; if (m > mmm) { /* | | |B | | | * Compute |AA| x |B | = |C | */ mzed_t const *A_last_row = mzed_init_window(A, mmm, 0, m, k); mzed_t const *B_first_col= mzed_init_window(B, 0, 0, k, nnn); mzed_t *C_last_row = mzed_init_window(C, mmm, 0, m, nnn); _mzed_mul_newton_john(C_last_row, A_last_row, B_first_col); mzed_free_window((mzed_t*)A_last_row); mzed_free_window((mzed_t*)B_first_col); mzed_free_window(C_last_row); } kkk*=2; if (k > kkk) { /* Add to | | | B| |C | * result |A | x | | = | | */ mzed_t const *A_last_col = mzed_init_window(A, 0, kkk, mmm, k); mzed_t const *B_last_row = mzed_init_window(B, kkk, 0, k, nnn); mzed_t *C_bulk = mzed_init_window(C, 0, 0, mmm, nnn); _mzed_mul_newton_john(C_bulk, A_last_col, B_last_row); mzed_free_window((mzed_t*)A_last_col); mzed_free_window((mzed_t*)B_last_row); mzed_free_window(C_bulk); } return C; } rci_t _mzed_strassen_cutoff(const mzed_t *C, const mzed_t *A, const mzed_t *B) { rci_t cutoff; /* it seems most of it is cache bound: 2 matrix * (n^2 *w / 8 ) <= L2 */ switch(A->finite_field->degree) { case 2: cutoff = MIN(((int)sqrt((double)(4*__M4RI_CPU_L2_CACHE)))/2,4096); break; case 3: case 4: case 5: case 6: case 7: case 8: cutoff = MIN(((int)sqrt((double)(4*__M4RI_CPU_L2_CACHE/A->w))),4096); break; case 9: /* on redhawk 2048 is much better, sage.math 1204 wins **/ cutoff = 2048; break; case 10: case 11: case 12: case 13: case 14: case 15: case 16: cutoff = 4096; break; default: cutoff = 1024; break; } if (cutoff < 2*__M4RI_TWOPOW(A->finite_field->degree)) cutoff = 2*__M4RI_TWOPOW(A->finite_field->degree); return cutoff; } libm4rie-20130416/src/strassen.h000066400000000000000000000067511212302364300162720ustar00rootroot00000000000000/** * \file strassen.h * \brief Strassen-Winograd multiplication for mzed_t * \author Martin Albrecht */ #ifndef M4RIE_STRASSEN_H #define M4RIE_STRASSEN_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ /** * \brief \f$ C = A \cdot B \f$ using Strassen-Winograd. * * This function uses Strassen-Winograd multiplication (Bodrato * variant) recursively until it reaches the cutoff, where it switches * to Newton-John table based multiplication or naive multiplication. * * \param C Preallocated product matrix, may be NULL for allocation. * \param A Input matrix A. * \param B Input matrix B. * \param cutoff Crossover to basecase dimension > 64 or 0 for heuristic choice * * \ingroup Multiplication */ mzed_t *mzed_mul_strassen(mzed_t *C, const mzed_t *A, const mzed_t *B, int cutoff); /** * \brief \f$ C = C + A \cdot B \f$ using Strassen-Winograd. * * This function uses Strassen-Winograd multiplication (Bodrato * variant) recursively until it reaches the cutoff, where it switches * to Newton-John table based multiplication or naive multiplication. * * \param C Preallocated product matrix, may be NULL for allocation. * \param A Input matrix A. * \param B Input matrix B. * \param cutoff Crossover to basecase dimension > 64 or 0 for heuristic choice. * * \ingroup Multiplication */ mzed_t *mzed_addmul_strassen(mzed_t *C, const mzed_t *A, const mzed_t *B, int cutoff); /** * \brief \f$ C = A \cdot B \f$ using Strassen-Winograd. * * This function uses Strassen-Winograd multiplication (Bodrato * variant) recursively until it reaches the cutoff, where it switches * to Newton-John table based multiplication or naive multiplication. * * \param C Preallocated product matrix. * \param A Input matrix A. * \param B Input matrix B. * \param cutoff Crossover to basecase dimension > 64 * * \ingroup Multiplication * */ mzed_t *_mzed_mul_strassen(mzed_t *C, const mzed_t *A, const mzed_t *B, int cutoff); /** * \brief \f$ C = A \cdot B \f$ using Strassen-Winograd. * * This function uses Strassen-Winograd multiplication (Bodrato * variant) recursively until it reaches the cutoff, where it switches * to Newton-John table based multiplication or naive multiplication. * * \param C Preallocated product matrix. * \param A Input matrix A. * \param B Input matrix B. * \param cutoff Crossover to basecase dimension > 64 * * \ingroup Multiplication */ mzed_t *_mzed_addmul_strassen(mzed_t *C, const mzed_t *A, const mzed_t *B, int cutoff); /** * \brief Return heurstic choice for crossover parameter for Strassen-Winograd multiplication given A, B and C. * * \param C Matrix (ignored) * \param A Matrix * \param B Martix (ignored) * * \ingroup Multiplication */ rci_t _mzed_strassen_cutoff(const mzed_t *C, const mzed_t *A, const mzed_t *B); #endif //M4RIE_STRASSEN_H libm4rie-20130416/src/trsm.c000066400000000000000000000037511212302364300154050ustar00rootroot00000000000000#include "trsm.h" #include "newton_john.h" #include "conversion.h" void mzed_trsm_upper_left_naive(const mzed_t *U, mzed_t *B) { assert(U->finite_field == B->finite_field); assert(U->nrows == U->ncols); assert(B->nrows == U->ncols); const gf2e *ff = U->finite_field; for(int i=B->nrows-1; i>=0; i--) { for(rci_t k=i+1; knrows; k++) { mzed_add_multiple_of_row(B, i, B, k, mzed_read_elem(U, i, k), 0); } mzed_rescale_row(B, i, 0, gf2e_inv(ff, mzed_read_elem(U, i, i))); } } void mzed_trsm_lower_left_naive(const mzed_t *L, mzed_t *B) { assert(L->finite_field == B->finite_field); assert(L->nrows == L->ncols); assert(B->nrows == L->ncols); const gf2e *ff = L->finite_field; for(rci_t i=0; inrows; i++) { for(rci_t k=0; kfinite_field == B->finite_field); assert(U->nrows == U->ncols); assert(B->nrows == U->ncols); const mzed_t *Ue = mzed_cling(NULL, U); mzed_t *BB = mzed_init(B->finite_field, B->nrows, B->ncols + B->x[0]->offset); mzed_t *Be = mzed_init_window(BB, 0, B->x[0]->offset, B->nrows, B->ncols+ B->x[0]->offset); mzed_cling(Be, B); mzed_trsm_upper_left_naive(Ue, Be); mzed_slice(B, Be); mzed_free((mzed_t*)Ue); mzed_free(BB); mzed_free_window(Be); } void mzd_slice_trsm_lower_left_naive(const mzd_slice_t *L, mzd_slice_t *B) { assert(L->finite_field == B->finite_field); assert(L->nrows == L->ncols); assert(B->nrows == L->ncols); const mzed_t *Le = mzed_cling(NULL, L); mzed_t *Be = mzed_cling(NULL, B); mzed_trsm_lower_left_naive(Le, Be); mzed_slice(B, Be); mzed_free((mzed_t*)Le); mzed_free(Be); } #include "mzed_intro.inl" #include "trsm.inl" #include "mzed_outro.inl" #include "mzd_slice_intro.inl" #include "trsm.inl" #include "mzd_slice_outro.inl" libm4rie-20130416/src/trsm.h000066400000000000000000000103201212302364300154000ustar00rootroot00000000000000/** * \file trsm.h * \brief Triangular System Solving with Matrices (TRSM). * * \author Martin Albrecht */ #ifndef TRSM_H #define TRSM_H /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include #include #define MZED_TRSM_CUTOFF 512 /**< Crossover dimension to TRSM base cases */ /** * \brief \f$B = U^{-1} \cdot B\f$ * * \param U Upper-triangular matrix (other entries are ignored). * \param B Matrix. * \param cutoff Crossover dimension to base case. * * \ingroup Triangular */ void _mzed_trsm_upper_left(const mzed_t *U, mzed_t *B, const rci_t cutoff); /** * \brief \f$B = U^{-1} \cdot B\f$ * * \param U Upper-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ void mzed_trsm_upper_left_naive(const mzed_t *U, mzed_t *B); /** * \brief \f$B = U^{-1} \cdot B\f$ * * \param U Upper-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ static inline void mzed_trsm_upper_left(const mzed_t *U, mzed_t *B) { _mzed_trsm_upper_left(U, B, MZED_TRSM_CUTOFF); } /** * \brief \f$B = U^{-1} \cdot B\f$ * * \param U Upper-triangular matrix (other entries are ignored). * \param B Matrix. * \param cutoff Crossover dimension to base case. * * \ingroup Triangular */ void _mzd_slice_trsm_upper_left(const mzd_slice_t *U, mzd_slice_t *B, const rci_t cutoff); /** * \brief \f$B = U^{-1} \cdot B\f$ * * \param U Upper-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ void mzd_slice_trsm_upper_left_naive(const mzd_slice_t *U, mzd_slice_t *B); /** * \brief \f$B = U^{-1} \cdot B\f$ * * \param U Upper-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ static inline void mzd_slice_trsm_upper_left(const mzd_slice_t *U, mzd_slice_t *B) { _mzd_slice_trsm_upper_left(U, B, MZED_TRSM_CUTOFF); } /** * \brief \f$B = L^{-1} \cdot B\f$ * * \param L Lower-triangular matrix (other entries are ignored). * \param B Matrix. * \param cutoff Crossover dimension to base case. * * \ingroup Triangular */ void _mzed_trsm_lower_left(const mzed_t *L, mzed_t *B, const rci_t cutoff); /** * \brief \f$B = L^{-1} \cdot B\f$ * * \param L Lower-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ void mzed_trsm_lower_left_naive(const mzed_t *L, mzed_t *B); /** * \brief \f$B = L^{-1} \cdot B\f$ * * \param L Lower-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ static inline void mzed_trsm_lower_left(const mzed_t *L, mzed_t *B) { _mzed_trsm_lower_left(L, B, MZED_TRSM_CUTOFF); } /** * \brief \f$B = L^{-1} \cdot B\f$ * * \param L Lower-triangular matrix (other entries are ignored). * \param B Matrix. * \param cutoff Crossover dimension to base case. * * \ingroup Triangular */ void _mzd_slice_trsm_lower_left(const mzd_slice_t *L, mzd_slice_t *B, const rci_t cutoff); /** * \brief \f$B = L^{-1} \cdot B\f$ * * \param L Lower-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ void mzd_slice_trsm_lower_left_naive(const mzd_slice_t *L, mzd_slice_t *B); /** * \brief \f$B = L^{-1} \cdot B\f$ * * \param L Lower-triangular matrix (other entries are ignored). * \param B Matrix. * * \ingroup Triangular */ static inline void mzd_slice_trsm_lower_left(const mzd_slice_t *L, mzd_slice_t *B) { _mzd_slice_trsm_lower_left(L, B, MZED_TRSM_CUTOFF); } #endif //TRSM_H libm4rie-20130416/src/trsm.inl000066400000000000000000000055221212302364300157430ustar00rootroot00000000000000/** * \brief inline template for TRSM routines * \author Martin Albrecht * * \note We want to keep this library in C, hence we cannot use of C++ * templates. */ void _matrix_trsm_lower_left(const matrix_t *L, matrix_t *B, const rci_t cutoff) { assert((L->finite_field == B->finite_field) && (L->nrows == L->ncols) && (B->nrows == L->ncols)); if (L->nrows <= cutoff || B->ncols <= cutoff) { matrix_trsm_lower_left_newton_john(L,B); return; } /** \verbatim |\ ______ | \ | | | \ | B0 | |L00\ | | |____\ |______| | |\ | | | | \ | | | | \ | B1 | |L10 |L11\ | | |____|____\ |______| \endverbatim */ rci_t c = L->nrows/2; c = MAX((c - c%m4ri_radix),m4ri_radix); matrix_t *B0 = matrix_init_window(B, 0, 0, c, B->ncols); matrix_t *B1 = matrix_init_window(B, c, 0, B->nrows, B->ncols); const matrix_t *L00 = (const matrix_t*)matrix_init_window((matrix_t*)L, 0, 0, c, c); const matrix_t *L10 = (const matrix_t*)matrix_init_window((matrix_t*)L, c, 0, B->nrows, c); const matrix_t *L11 = (const matrix_t*)matrix_init_window((matrix_t*)L, c, c, B->nrows, B->nrows); _matrix_trsm_lower_left(L00, B0, cutoff); matrix_addmul(B1, L10, B0); _matrix_trsm_lower_left(L11, B1, cutoff); matrix_free_window(B0); matrix_free_window(B1); matrix_free_window((matrix_t*)L00); matrix_free_window((matrix_t*)L10); matrix_free_window((matrix_t*)L11); } void _matrix_trsm_upper_left(matrix_t const *U, matrix_t *B, const rci_t cutoff) { assert((U->finite_field == B->finite_field) && (U->nrows == U->ncols) && (B->nrows == U->ncols)); if (U->nrows <= cutoff || B->ncols <= cutoff) { matrix_trsm_upper_left_newton_john(U,B); return; } /** \verbatim __________ ______ \ U00| | | | \ |U01 | | | \ | | | B0 | \ | | | | \|____| |______| \ | | | \U11| | | \ | | B1 | \ | | | \| |______| \endverbatim */ rci_t c = U->nrows/2; c = MAX((c - c%m4ri_radix),m4ri_radix); matrix_t *B0 = matrix_init_window(B, 0, 0, c, B->ncols); matrix_t *B1 = matrix_init_window(B, c, 0, B->nrows, B->ncols); const matrix_t *U00 = (const matrix_t *)matrix_init_window(U, 0, 0, c, c); const matrix_t *U01 = (const matrix_t *)matrix_init_window(U, 0, c, c, B->nrows); const matrix_t *U11 = (const matrix_t *)matrix_init_window(U, c, c, B->nrows, B->nrows); _matrix_trsm_upper_left(U11, B1, cutoff); matrix_addmul(B0, U01, B1); _matrix_trsm_upper_left(U00, B0, cutoff); matrix_free_window(B0); matrix_free_window(B1); matrix_free_window((matrix_t*)U00); matrix_free_window((matrix_t*)U01); matrix_free_window((matrix_t*)U11); } libm4rie-20130416/tests/000077500000000000000000000000001212302364300146215ustar00rootroot00000000000000libm4rie-20130416/tests/test_elimination.cc000066400000000000000000000071461212302364300205070ustar00rootroot00000000000000/** * \file test_elimination.cc * \brief Test code for elimination routines * * \author Martin Albrecht */ /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010-2012 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "testing.h" int test_equality(gf2e *ff, rci_t m, rci_t n) { int fail_ret = 0; mzed_t *A0 = random_mzed_t(ff, m, n); mzed_t *A1 = mzed_copy(NULL, A0); mzed_t *A2 = mzed_copy(NULL, A0); mzed_t *A3 = mzed_copy(NULL, A0); mzed_set_canary(A1); mzed_set_canary(A2); mzed_set_canary(A3); const rci_t r0 = mzed_echelonize_newton_john(A0,1); const rci_t r1 = mzed_echelonize_naive(A1,1); const rci_t r2 = mzed_echelonize(A2,1); const rci_t r3 = mzed_echelonize_ple(A3,1); m4rie_check( r0 == r1); m4rie_check( mzed_cmp(A0, A1) == 0); m4rie_check( r1 == r2); m4rie_check( mzed_cmp(A1, A2) == 0); m4rie_check( r2 == r3); m4rie_check( mzed_cmp(A2, A3) == 0); m4rie_check( r3 == r0); m4rie_check( mzed_cmp(A3, A0) == 0); m4rie_check( mzed_canary_is_alive(A0) ); m4rie_check( mzed_canary_is_alive(A1) ); m4rie_check( mzed_canary_is_alive(A2) ); m4rie_check( mzed_canary_is_alive(A3) ); mzed_free(A0); mzed_free(A1); mzed_free(A2); mzed_free(A3); return fail_ret; } int test_batch(gf2e *ff, rci_t m, rci_t n) { int fail_ret = 0; printf("elim: k: %2d, minpoly: 0x%05x m: %5d, n: %5d ",(int)ff->degree, (unsigned int)ff->minpoly, (int)m, (int)n); if(m == n) { m4rie_check( test_equality(ff, m, n) == 0); printf("."); fflush(0); printf(" "); } else { m4rie_check( test_equality(ff, m, n) == 0); printf("."); fflush(0); m4rie_check( test_equality(ff, n, m) == 0); printf("."); fflush(0); } if (fail_ret == 0) printf(" passed\n"); else printf(" FAILED\n"); return fail_ret; } int main(int argc, char **argv) { srandom(17); int runlong = parse_parameters(argc, argv); gf2e *ff; int fail_ret = 0; for(int k=2; k<=16; k++) { ff = gf2e_init(irreducible_polynomials[k][1]); fail_ret += test_batch(ff, 2, 5); fail_ret += test_batch(ff, 5, 10); fail_ret += test_batch(ff, 1, 1); fail_ret += test_batch(ff, 1, 2); fail_ret += test_batch(ff, 11, 12); fail_ret += test_batch(ff, 21, 22); fail_ret += test_batch(ff, 13, 2); fail_ret += test_batch(ff, 32, 33); fail_ret += test_batch(ff, 63, 64); if (k <= 12 || runlong) { fail_ret += test_batch(ff, 127, 128); fail_ret += test_batch(ff, 200, 20); } fail_ret += test_batch(ff, 1, 1); fail_ret += test_batch(ff, 1, 3); fail_ret += test_batch(ff, 11, 13); fail_ret += test_batch(ff, 21, 23); fail_ret += test_batch(ff, 13, 90); fail_ret += test_batch(ff, 32, 34); fail_ret += test_batch(ff, 63, 65); if (k <= 12 || runlong) { fail_ret += test_batch(ff, 127, 129); fail_ret += test_batch(ff, 200, 112); fail_ret += test_batch(ff, 10, 200); } gf2e_free(ff); } return fail_ret; } libm4rie-20130416/tests/test_multiplication.cc000066400000000000000000000154401212302364300212300ustar00rootroot00000000000000/** * \file test_multiplication.cc * \brief Test code for multiplication routines * * \author Martin Albrecht */ /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010-2012 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "testing.h" int test_addmul(gf2e *ff, rci_t m, rci_t n, rci_t l) { int fail_ret = 0; mzed_t *A = random_mzed_t(ff, m, l); mzed_t *B = random_mzed_t(ff, l, n); mzed_t *C0 = random_mzed_t(ff, m, n); mzed_t *C1 = mzed_copy(NULL, C0); mzed_t *C2 = mzed_copy(NULL, C0); mzed_t *C3 = mzed_copy(NULL, C0); mzed_t *C4 = mzed_copy(NULL, C0); mzed_set_canary(C1); mzed_set_canary(C2); mzed_set_canary(C3); mzed_set_canary(C4); mzed_addmul_newton_john(C0, A, B); mzed_addmul_naive(C1, A, B); mzed_addmul_strassen(C2, A, B, 64); mzed_addmul(C3, A, B); m4rie_check( mzed_cmp(C0, C1) == 0); m4rie_check( mzed_cmp(C1, C2) == 0); m4rie_check( mzed_cmp(C2, C3) == 0); mzed_addmul_karatsuba(C4, A, B); m4rie_check( mzed_cmp(C3, C4) == 0); m4rie_check( mzed_canary_is_alive(A) ); m4rie_check( mzed_canary_is_alive(B) ); m4rie_check( mzed_canary_is_alive(C1) ); m4rie_check( mzed_canary_is_alive(C2) ); m4rie_check( mzed_canary_is_alive(C3) ); m4rie_check( mzed_canary_is_alive(C4) ); mzed_free(A); mzed_free(B); mzed_free(C0); mzed_free(C1); mzed_free(C2); mzed_free(C3); mzed_free(C4); return fail_ret; } int test_mul(gf2e *ff, rci_t m, rci_t n, rci_t l) { int fail_ret = 0; const mzed_t *A = random_mzed_t(ff, m, l); const mzed_t *B = random_mzed_t(ff, l, n); mzed_t *C0 = random_mzed_t(ff, m, n); mzed_t *C1 = random_mzed_t(ff, m, n); mzed_t *C2 = random_mzed_t(ff, m, n); mzed_t *C3 = random_mzed_t(ff, m, n); mzed_t *C4 = random_mzed_t(ff, m, n); mzed_mul_newton_john(C0, A, B); mzed_mul_naive(C1, A, B); mzed_mul_strassen(C2, A, B, 64); mzed_mul(C3, A, B); m4rie_check( mzed_cmp(C0, C1) == 0); m4rie_check( mzed_cmp(C1, C2) == 0); m4rie_check( mzed_cmp(C2, C3) == 0); mzed_mul_karatsuba(C4, A, B); m4rie_check( mzed_cmp(C3, C4) == 0); m4rie_check( mzed_canary_is_alive((mzed_t*)A) ); m4rie_check( mzed_canary_is_alive((mzed_t*)B) ); m4rie_check( mzed_canary_is_alive(C1) ); m4rie_check( mzed_canary_is_alive(C2) ); m4rie_check( mzed_canary_is_alive(C3) ); mzed_free((mzed_t*)A); mzed_free((mzed_t*)B); mzed_free(C0); mzed_free(C1); mzed_free(C2); mzed_free(C3); mzed_free(C4); return fail_ret; } int test_scalar(gf2e *ff, rci_t m, rci_t n) { int fail_ret = 0; word a = random() & ((1<degree)-1); while (!a) a = random() & ((1<degree)-1); mzed_t *B = random_mzed_t(ff, m, n); mzed_t *C0 = mzed_init(ff, m, n); mzed_t *C1 = random_mzed_t(ff, m, n); mzed_t *C2 = NULL; C0 = mzed_mul_scalar(C0, a, B); C1 = mzed_mul_scalar(C1, a, B); C2 = mzed_mul_scalar(C2, a, B); m4rie_check( mzed_cmp(C0, C1) == 0); m4rie_check( mzed_cmp(C1, C2) == 0); mzed_t *C3 = NULL; mzd_slice_t *BB = mzed_slice(NULL, B); mzd_slice_t *CC = mzd_slice_mul_scalar(NULL, a, BB); C3 = mzed_cling(C3, CC); mzd_slice_free(BB); mzd_slice_free(CC); m4rie_check( mzed_cmp(C2, C3) == 0); mzed_free(C3); const word a_inv = gf2e_inv(ff, a); mzed_t *B0 = mzed_init(ff, m, n); mzed_t *B1 = random_mzed_t(ff, m, n); mzed_t *B2 = NULL; B0 = mzed_mul_scalar(B0, a_inv, C0); B1 = mzed_mul_scalar(B1, a_inv, C1); B2 = mzed_mul_scalar(B2, a_inv, C2); m4rie_check( mzed_cmp(B, B0) == 0); m4rie_check( mzed_cmp(B, B1) == 0); m4rie_check( mzed_cmp(B, B2) == 0); mzed_free(C0); mzed_free(C1); mzed_free(C2); mzed_free(B0); mzed_free(B1); mzed_free(B2); mzed_free(B); return fail_ret; } int test_batch(gf2e *ff, rci_t m, rci_t l, rci_t n) { int fail_ret = 0; printf("mul: k: %2d, minpoly: 0x%05x m: %5d, l: %5d, n: %5d ",(int)ff->degree, (unsigned int)ff->minpoly, (int)m, (int)l, (int)n); m4rie_check(test_scalar(ff, m, m) == 0); printf("."); fflush(0); m4rie_check(test_scalar(ff, l, l) == 0); printf("."); fflush(0); m4rie_check(test_scalar(ff, n, n) == 0); printf("."); fflush(0); m4rie_check(test_scalar(ff, m, l) == 0); printf("."); fflush(0); m4rie_check(test_scalar(ff, l, n) == 0); printf("."); fflush(0); m4rie_check(test_scalar(ff, m, n) == 0); printf("."); fflush(0); m4rie_check(test_scalar(ff, l, m) == 0); printf("."); fflush(0); if(m == l && m == n) { m4rie_check( test_mul(ff, m, l, n) == 0); printf("."); fflush(0); m4rie_check(test_addmul(ff, m, l, n) == 0); printf("."); fflush(0); printf(" "); } else { m4rie_check( test_mul(ff, m, l, n) == 0); printf("."); fflush(0); m4rie_check( test_mul(ff, m, n, l) == 0); printf("."); fflush(0); m4rie_check( test_mul(ff, n, m, l) == 0); printf("."); fflush(0); m4rie_check( test_mul(ff, n, l, m) == 0); printf("."); fflush(0); m4rie_check( test_mul(ff, l, m, n) == 0); printf("."); fflush(0); m4rie_check( test_mul(ff, l, n, m) == 0); printf("."); fflush(0); m4rie_check(test_addmul(ff, m, l, n) == 0); printf("."); fflush(0); m4rie_check(test_addmul(ff, m, n, l) == 0); printf("."); fflush(0); m4rie_check(test_addmul(ff, n, m, l) == 0); printf("."); fflush(0); m4rie_check(test_addmul(ff, n, l, m) == 0); printf("."); fflush(0); m4rie_check(test_addmul(ff, l, m, n) == 0); printf("."); fflush(0); m4rie_check(test_addmul(ff, l, n, m) == 0); printf("."); fflush(0); } if (fail_ret == 0) printf(" passed\n"); else printf(" FAILED\n"); return fail_ret; } int main(int argc, char **argv) { srandom(17); int runlong = parse_parameters(argc, argv); gf2e *ff; int fail_ret = 0; for(int k=2; k<=16; k++) { ff = gf2e_init(irreducible_polynomials[k][1]); fail_ret += test_batch(ff, 1, 1, 1); fail_ret += test_batch(ff, 1, 2, 3); fail_ret += test_batch(ff, 11, 12, 13); fail_ret += test_batch(ff, 21, 22, 23); fail_ret += test_batch(ff, 13, 2, 90); fail_ret += test_batch(ff, 32, 33, 34); fail_ret += test_batch(ff, 63, 64, 65); if(k<=12 || runlong) { fail_ret += test_batch(ff, 127, 128, 129); fail_ret += test_batch(ff, 200, 20, 112); } gf2e_free(ff); } return fail_ret; } libm4rie-20130416/tests/test_ple.cc000066400000000000000000000146551212302364300167620ustar00rootroot00000000000000/** * \file test_ple.cc * \brief Test code for PLE elimination routines * * \author Martin Albrecht */ /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010-2012 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "testing.h" int test_mzd_slice_ple(gf2e *ff, const rci_t m, const rci_t n, const rci_t r) { int fail_ret = 0; mzed_t *a = random_mzed_t_rank(ff, m, n, r); mzd_slice_t *A = mzed_slice(NULL, a); mzed_free(a); mzd_slice_t *LE = mzd_slice_copy(NULL, A); mzd_slice_t *L = mzd_slice_init(ff, m, m); mzd_slice_t *E = mzd_slice_init(ff, m, n); mzp_t *P = mzp_init(m); mzp_t *Q = mzp_init(n); rci_t rbar = mzd_slice_ple(LE, P, Q); m4rie_check( rbar == r); for(rci_t j=0; jnrows; i++) { mzd_slice_write_elem(L,i,j, mzd_slice_read_elem(LE,i,j)); } } for(rci_t i=0; ivalues[i], 1); for(rci_t j=Q->values[i]+1; j< LE->ncols; j++) { mzd_slice_write_elem(E, i, j, mzd_slice_read_elem(LE, i, j)); } } mzd_slice_t *B = mzd_slice_mul(NULL, L, E); mzd_slice_apply_p_left(A, P); m4rie_check( mzd_slice_cmp(A, B) == 0); mzd_slice_free(A); mzd_slice_free(B); mzd_slice_free(LE); mzd_slice_free(L); mzd_slice_free(E); mzp_free(P); mzp_free(Q); return fail_ret; } int test_mzed_ple(gf2e *ff, const rci_t m, const rci_t n, const rci_t r) { int fail_ret = 0; mzed_t *A = random_mzed_t_rank(ff, m, n, r); /** * We check equality first */ mzed_t *LE0 = mzed_copy(NULL, A); mzp_t *P0 = mzp_init(m); mzp_t *Q0 = mzp_init(n); mzed_t *LE1 = mzed_copy(NULL, A); mzp_t *P1 = mzp_init(m); mzp_t *Q1 = mzp_init(n); mzed_t *LE2 = mzed_copy(NULL, A); mzp_t *P2 = mzp_init(m); mzp_t *Q2 = mzp_init(n); mzed_set_canary(LE0); rci_t r0 = mzed_ple_naive( LE0, P0, Q0); m4rie_check( mzed_canary_is_alive(LE0) ); m4rie_check( r0 == r); mzed_set_canary(LE1); rci_t r1 = mzed_ple_newton_john(LE1, P1, Q1); m4rie_check( mzed_canary_is_alive(LE1) ); m4rie_check( r1 == r); mzed_set_canary(LE2); rci_t r2 = mzed_ple( LE2, P2, Q2); m4rie_check( mzed_canary_is_alive(LE2) ); m4rie_check( r2 == r); m4rie_check( mzed_cmp(LE0, LE1) == 0 ); m4rie_check( mzed_cmp(LE1, LE2) == 0 ); m4rie_check( mzed_cmp(LE2, LE0) == 0 ); /** * Now we check mathematical properties. Equality has been * established so we only deal with LE0. */ mzed_t *L = mzed_init(ff, m, m); mzed_set_canary(L); for(rci_t j=0; jnrows; i++) mzed_write_elem(L,i,j, mzed_read_elem(LE0,i,j)); m4rie_check( mzed_canary_is_alive(L) ); mzed_t *E = mzed_init(ff, m, n); mzed_set_canary(E); for(rci_t i=0; ivalues[i], 1); for(rci_t j=Q0->values[i]+1; j< LE0->ncols; j++) mzed_write_elem(E, i, j, mzed_read_elem(LE0, i, j)); } m4rie_check( mzed_canary_is_alive(E) ); mzed_t *B = mzed_mul(NULL, L, E); mzed_apply_p_left(A, P0); m4rie_check( mzed_canary_is_alive(A) ); m4rie_check( mzed_cmp(A, B) == 0); mzed_free(A); mzed_free(B); mzed_free(LE0); mzp_free(P0); mzp_free(Q0); mzed_free(LE1); mzp_free(P1); mzp_free(Q1); mzed_free(LE2); mzp_free(P2); mzp_free(Q2); mzed_free(L); mzed_free(E); return fail_ret; } int test_batch(gf2e *ff, const rci_t m, const rci_t n, const rci_t r) { assert(r <= m); assert(r <= n); printf("ple: k: %2d, minpoly: 0x%05x m: %5d, n: %5d, r: %5d ",(int)ff->degree, (unsigned int)ff->minpoly, (int)m, (int)n, (int)r); int fail_ret = 0; if(m == n) { m4rie_check( test_mzed_ple(ff, m, n, r) == 0); printf("."); fflush(0); printf(" "); if(ff->degree <= 4) { m4rie_check( test_mzd_slice_ple(ff, m, n, r) == 0); printf("."); fflush(0); printf(" "); } else { printf(" "); } fflush(0); } else { m4rie_check( test_mzed_ple(ff, m, n, r) == 0); printf("."); fflush(0); m4rie_check( test_mzed_ple(ff, n, m, r) == 0); printf("."); fflush(0); if(ff->degree <= 4) { m4rie_check( test_mzd_slice_ple(ff, m, n, r) == 0); printf("."); fflush(0); m4rie_check( test_mzd_slice_ple(ff, n, m, r) == 0); printf("."); fflush(0); } else { printf(" "); } fflush(0); } if (fail_ret == 0) printf(" passed\n"); else printf(" FAILED\n"); return fail_ret; } int main(int argc, char **argv) { srandom(17); int runlong = parse_parameters(argc, argv); gf2e *ff; int fail_ret = 0; for(int k=2; k<=16; k++) { ff = gf2e_init(irreducible_polynomials[k][1]); fail_ret += test_batch(ff, 1, 1, 1); fail_ret += test_batch(ff, 1, 2, 1); fail_ret += test_batch(ff, 2, 2, 2); fail_ret += test_batch(ff, 2, 3, 2); fail_ret += test_batch(ff, 11, 12, 10); fail_ret += test_batch(ff, 21, 22, 21); fail_ret += test_batch(ff, 13, 2, 2); fail_ret += test_batch(ff, 32, 33, 31); fail_ret += test_batch(ff, 63, 64, 62); if(k <= 12 || runlong) { fail_ret += test_batch(ff, 127, 128, 125); fail_ret += test_batch(ff, 127, 128, 12); fail_ret += test_batch(ff, 127, 128, 37); fail_ret += test_batch(ff, 127, 128, 67); fail_ret += test_batch(ff, 200, 20, 19); } fail_ret += test_batch(ff, 1, 1, 0); fail_ret += test_batch(ff, 1, 3, 1); fail_ret += test_batch(ff, 11, 13, 10); fail_ret += test_batch(ff, 21, 23, 20); fail_ret += test_batch(ff, 13, 90, 10); fail_ret += test_batch(ff, 32, 34, 31); fail_ret += test_batch(ff, 63, 65, 62); if(k <= 12 || runlong) { fail_ret += test_batch(ff, 127, 129, 127); fail_ret += test_batch(ff, 200, 112, 111); } gf2e_free(ff); } if (fail_ret == 0) printf("success\n"); return fail_ret; } libm4rie-20130416/tests/test_smallops.cc000066400000000000000000000133051212302364300200230ustar00rootroot00000000000000/** * \file test_smallops.cc * \brief Test code for auxilary routines * * \author Martin Albrecht */ /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2010-2012 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "testing.h" int test_gf2e(gf2e *ff) { int fail_ret = 0; for(word a=1; a < __M4RI_TWOPOW(ff->degree); a++) { word a_inv = ff->inv(ff, a); fail_ret += ((a == ff->inv(ff, a_inv)) ^ 1); } return fail_ret; } int test_slice(gf2e *ff, int m, int n) { int fail_ret = 0; mzed_t *A = random_mzed_t(ff, m, n); mzd_slice_t *a = mzed_slice(NULL, A); mzd_slice_set_canary(a); mzd_slice_t *b = random_mzd_slice_t(ff, m, n); mzed_slice(b, A); m4rie_check(mzd_slice_canary_is_alive(a)); m4rie_check(mzd_slice_canary_is_alive(b)); m4rie_check(mzed_canary_is_alive(A)); m4rie_check(mzd_slice_cmp(a, b) == 0); mzed_t *B = mzed_cling(NULL, b); m4rie_check(mzed_cmp(A, B) == 0); mzed_cling(A, a); m4rie_check(mzed_cmp(A, B) == 0); mzed_free(A); mzed_free(B); mzd_slice_free(a); mzd_slice_free(b); return fail_ret; }; int test_slice_known_answers(gf2e *ff, int m, int n) { int fail_ret = 0; mzed_t *A = mzed_init(ff, m, n); mzed_set_canary(A); mzd_t *one = mzd_init(m,n); mzd_set_ui(one, 1); for(int j=0; jdegree; j++) { mzed_set_ui(A, 1<depth; i++) { if (i!=j) { m4rie_check( mzd_is_zero(a->x[i]) ); } else { m4rie_check( mzd_cmp(a->x[i], one) == 0 ); } } mzed_t *AA = mzed_cling(NULL, a); m4rie_check( mzed_cmp(AA, A) == 0 ); m4rie_check( mzed_canary_is_alive(A) ); mzd_slice_free(a); mzed_free(AA); } mzd_free(one); mzed_free(A); return fail_ret; } int test_add(gf2e *ff, int m, int n) { int fail_ret = 0; mzed_t *A = random_mzed_t(ff, m, n); mzed_t *B = random_mzed_t(ff, m, n); mzed_t *C = random_mzed_t(ff, m, n); mzed_add(C,A,B); mzed_t *D = mzed_copy(NULL, C); mzed_set_canary(D); mzed_add(C,C,A); mzed_add(C,C,B); m4rie_check(mzed_is_zero(C) == 1); mzed_add(C,A,B); m4rie_check(mzed_cmp(D,C) == 0); mzd_slice_t *a = mzed_slice(NULL, A); mzd_slice_t *b = mzed_slice(NULL, B); mzd_slice_t *c = mzed_slice(NULL, C); mzd_slice_set_canary(a); mzd_slice_set_canary(b); mzd_slice_set_canary(c); mzd_slice_add(c, a, b); mzd_slice_t *d = mzd_slice_copy(NULL, c); mzd_slice_set_canary(d); mzed_cling(D, d); m4rie_check( mzed_cmp(D, C) == 0 ); m4rie_check( mzed_canary_is_alive(A)); m4rie_check( mzed_canary_is_alive(B)); m4rie_check( mzed_canary_is_alive(C)); m4rie_check( mzed_canary_is_alive(D)); m4rie_check( mzd_slice_canary_is_alive(a)); m4rie_check( mzd_slice_canary_is_alive(b)); m4rie_check( mzd_slice_canary_is_alive(c)); m4rie_check( mzd_slice_canary_is_alive(d)); mzed_free(A); mzed_free(B); mzed_free(C); mzed_free(D); mzd_slice_free(a); mzd_slice_free(b); mzd_slice_free(c); mzd_slice_free(d); return fail_ret; } int test_batch(gf2e *ff, int m, int n) { int fail_ret = 0; printf("testing k: %2d, m: %4d, n: %4d ",ff->degree,m,n); m4rie_check( test_slice(ff, m, n) == 0); printf("."); fflush(0); m4rie_check( test_add(ff, m, n) == 0) ; printf("."); fflush(0); m4rie_check( test_slice_known_answers(ff, m, n) == 0); printf("."); fflush(0); m4rie_check( test_slice(ff, m, m) == 0); printf("."); fflush(0); m4rie_check( test_add(ff, m, m) == 0) ; printf("."); fflush(0); m4rie_check( test_slice_known_answers(ff, m, m) == 0); printf("."); fflush(0); m4rie_check( test_slice(ff, n, m) == 0); printf("."); fflush(0); m4rie_check( test_add(ff, n, m) == 0) ; printf("."); fflush(0); m4rie_check( test_slice_known_answers(ff, n, m) == 0); printf("."); fflush(0); m4rie_check( test_slice(ff, n, n) == 0); printf("."); fflush(0); m4rie_check( test_add(ff, n, n) == 0) ; printf("."); fflush(0); m4rie_check( test_slice_known_answers(ff, n, n) == 0); printf("."); fflush(0); m4rie_check( test_gf2e(ff) == 0); printf("."); fflush(0); if (fail_ret == 0) printf(" passed\n"); else printf(" FAILED\n"); return fail_ret; } int main(int argc, char **argv) { gf2e *ff; int fail_ret = 0; for(int k=2; k<=16; k++) { ff = gf2e_init(irreducible_polynomials[k][1]); fail_ret += test_batch(ff, 2, m4ri_radix/gf2e_degree_to_w(ff)); fail_ret += test_batch(ff, 2, 2*m4ri_radix/gf2e_degree_to_w(ff)); fail_ret += test_batch(ff, 2, 3*m4ri_radix/gf2e_degree_to_w(ff)); fail_ret += test_batch(ff, 2, 4*m4ri_radix/gf2e_degree_to_w(ff)); fail_ret += test_batch(ff, 4, 3); fail_ret += test_batch(ff, 1, 2); fail_ret += test_batch(ff, 10, 11); fail_ret += test_batch(ff, 20, 19); fail_ret += test_batch(ff, 32, 64); fail_ret += test_batch(ff, 63, 65); fail_ret += test_batch(ff, 64, 65); fail_ret += test_batch(ff, 64, 128); fail_ret += test_batch(ff, 65, 129); fail_ret += test_batch(ff, 201, 200); fail_ret += test_batch(ff, 217, 2); gf2e_free(ff); } return fail_ret; } libm4rie-20130416/tests/test_trsm.cc000066400000000000000000000277421212302364300171700ustar00rootroot00000000000000/** * \file test_trsm.cc * \brief Test code for triangular system solving with matrices (TRSM) routines * * \author Martin Albrecht */ /****************************************************************************** * * M4RIE: Linear Algebra over GF(2^e) * * Copyright (C) 2011 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GEL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ ******************************************************************************/ #include "testing.h" mzed_t *random_mzed_t_upper_left(gf2e *ff, rci_t m) { const int bitmask = (1<degree)-1; mzed_t *U = random_mzed_t(ff, m, m); for(rci_t i=0; idegree)-1; mzed_t *L = random_mzed_t(ff, m, m); for(rci_t i=0; idegree)-1; mzd_slice_t *U = random_mzd_slice_t(ff, m, m); for(rci_t i=0; idegree)-1; mzd_slice_t *L = random_mzd_slice_t(ff, m, m); for(rci_t i=0; idegree)-1; for(rci_t i=0; idegree)-1; int fail_ret = 0; mzed_t *A = random_mzed_t(ff, m, m+n); mzed_t *U = mzed_init_window(A, 0, 0, m, m); mzed_t *B = mzed_init_window(A, 0, m, m, m+n); for(rci_t i=0; idegree)-1; for(rci_t i=0; idegree)-1; for(rci_t i=0; idegree)-1; for(rci_t i=0; idegree, (unsigned int)ff->minpoly, (int)m,(int)n); m4rie_check(test_mzed_trsm_lower_left(ff, m, n) == 0); printf("."); fflush(0); m4rie_check(test_mzed_trsm_upper_left(ff, m, n) == 0); printf("."); fflush(0); m4rie_check(test_mzed_trsm_lower_left(ff, n, m) == 0); printf("."); fflush(0); m4rie_check(test_mzed_trsm_upper_left(ff, n, m) == 0); printf("."); fflush(0); m4rie_check(test_mzed_trsm_upper_left_echelonize(ff, m, n) == 0); printf("."); fflush(0); m4rie_check(test_mzed_trsm_upper_left_echelonize(ff, n, m) == 0); printf("."); fflush(0); m4rie_check(test_mzd_slice_trsm_lower_left(ff, m, n) == 0); printf("."); fflush(0); m4rie_check(test_mzd_slice_trsm_lower_left(ff, n, m) == 0); printf("."); fflush(0); m4rie_check(test_mzd_slice_trsm_upper_left(ff, m, n) == 0); printf("."); fflush(0); m4rie_check(test_mzd_slice_trsm_upper_left(ff, n, m) == 0); printf("."); fflush(0); if (fail_ret == 0) printf(" passed\n"); else printf(" FAILED\n"); return fail_ret; } int main(int argc, char **argv) { srandom(17); int runlong = parse_parameters(argc, argv); gf2e *ff; int fail_ret = 0; for(int k=2; k<=16; k++) { ff = gf2e_init(irreducible_polynomials[k][1]); fail_ret += test_batch(ff, 1, 1); fail_ret += test_batch(ff, 1, 2); fail_ret += test_batch(ff, 11, 12); fail_ret += test_batch(ff, 21, 22); fail_ret += test_batch(ff, 13, 2); fail_ret += test_batch(ff, 32, 33); fail_ret += test_batch(ff, 63, 64); fail_ret += test_batch(ff, 65, 1); fail_ret += test_batch(ff, 65, 66); if(k<=12 || runlong) { fail_ret += test_batch(ff, 127, 128); fail_ret += test_batch(ff, 200, 20); } gf2e_free(ff); } if (fail_ret == 0) printf("success\n"); return fail_ret; } libm4rie-20130416/tests/testing.h000066400000000000000000000152051212302364300164520ustar00rootroot00000000000000#include #include #define m4rie_check(expr) \ if (!expr) { \ fail_ret += 1; \ printf("\n%s in %s:%d failed\n", #expr, __FILE__, __LINE__); \ } const word m4rie_canary = (word)"canary!"; static inline word field_mask(const gf2e *ff) { const word mask_fld = ((1<degree)-1); word mask = 0; switch(gf2e_degree_to_w(ff)) { case 2: break; case 4: mask |= mask_fld<< 0 | mask_fld<< 4 | mask_fld<< 8 | mask_fld<<12; mask |= mask_fld<<16 | mask_fld<<20 | mask_fld<<24 | mask_fld<<28; mask |= mask_fld<<32 | mask_fld<<36 | mask_fld<<40 | mask_fld<<44; mask |= mask_fld<<48 | mask_fld<<52 | mask_fld<<56 | mask_fld<<60; break; case 8: mask |= mask_fld<< 0 | mask_fld<< 8 | mask_fld<<16 | mask_fld<<24; mask |= mask_fld<<32 | mask_fld<<40 | mask_fld<<48 | mask_fld<<56; break; case 16: mask |= mask_fld<< 0 | mask_fld<<16 | mask_fld<<32 | mask_fld<<48; break; } return mask; } static inline void mzed_set_canary(mzed_t *A) { const word mask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - A->x->offset); const word mask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols)%m4ri_radix); const word mask_field = field_mask(A->finite_field); const rci_t n = A->x->width-1; for(rci_t i=0; inrows; i++) { A->x->rows[i][0] = (A->x->rows[i][0] & mask_begin) | (m4rie_canary & mask_field & ~mask_begin); A->x->rows[i][n] = (A->x->rows[i][n] & mask_end) | (m4rie_canary & mask_field & ~mask_end); } } static inline void mzed_clear_canary(mzed_t *A) { const word mask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - A->x->offset); const word mask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols)%m4ri_radix); const rci_t n = A->x->width-1; for(rci_t i=0; inrows; i++) { A->x->rows[i][0] &= mask_begin; A->x->rows[i][n] &= mask_end; } } static inline int mzed_canary_is_alive(mzed_t *A) { const word mask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - A->x->offset); const word mask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols)%m4ri_radix); const word mask_field = field_mask(A->finite_field); const rci_t n = A->x->width-1; if(n == 0) { for(rci_t i=0; inrows; i++) { if ((A->x->rows[i][0] & ~mask_begin & ~mask_begin) != (m4rie_canary & mask_field & ~mask_begin & ~mask_begin)) { return 0; } } } else { for(rci_t i=0; inrows; i++) { if ((A->x->rows[i][0] & ~mask_begin) != (m4rie_canary & mask_field & ~mask_begin)) { return 0; } if ((A->x->rows[i][n] & ~mask_end) != (m4rie_canary & mask_field & ~mask_end)) { return 0; } } } return 1; } static inline int mzed_interval_clean(mzed_t *A) { assert(A->x->offset == 0); const word mask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols)%m4ri_radix); const word mask_field = field_mask(A->finite_field); for(rci_t i=0; inrows; i++) { for(wi_t j=0; jx->width-1; j++) { if (A->x->rows[i][j] & mask_field) return 0; } if (A->x->rows[i][A->x->width-1] & mask_field & mask_end) return 0; } return 1; } static inline void mzd_slice_set_canary(mzd_slice_t *A) { const word mask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - A->x[0]->offset); const word mask_end = __M4RI_LEFT_BITMASK((A->x[0]->offset + A->ncols)%m4ri_radix); const rci_t n = A->x[0]->width-1; if(n != 0) { for(unsigned int e=0; efinite_field->degree; e++) { for(rci_t i=0; inrows; i++) { A->x[e]->rows[i][0] = (A->x[e]->rows[i][0] & mask_begin) | (m4rie_canary & ~mask_begin); A->x[e]->rows[i][n] = (A->x[e]->rows[i][n] & mask_end) | (m4rie_canary & ~mask_end); } } } else { for(unsigned int e=0; efinite_field->degree; e++) { for(rci_t i=0; inrows; i++) { A->x[e]->rows[i][0] = (A->x[e]->rows[i][0] & mask_begin & mask_end) | (m4rie_canary & ~(mask_begin & mask_end)); } } } } static inline void mzd_slice_clear_canary(mzd_slice_t *A) { const word mask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - A->x[0]->offset); const word mask_end = __M4RI_LEFT_BITMASK((A->x[0]->offset + A->ncols)%m4ri_radix); const rci_t n = A->x[0]->width-1; for(int e=0; efinite_field->degree; e++) { for(rci_t i=0; inrows; i++) { A->x[e]->rows[i][0] &=mask_begin; A->x[e]->rows[i][n] &=mask_end; } } } static inline int mzd_slice_canary_is_alive(mzd_slice_t *A) { const word mask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - A->x[0]->offset); const word mask_end = __M4RI_LEFT_BITMASK((A->x[0]->offset + A->ncols)%m4ri_radix); const rci_t n = A->x[0]->width-1; for(unsigned int e=0; efinite_field->degree; e++) { for(rci_t i=0; inrows; i++) { if ((A->x[e]->rows[i][0] & ~mask_begin) != (m4rie_canary & ~mask_begin)) { return 0; } if ((A->x[e]->rows[i][n] & ~mask_end) != (m4rie_canary & ~mask_end)) { return 0; } } return 1; } } static inline mzed_t *random_mzed_t(gf2e *ff, int m, int n) { mzed_t *A = mzed_init(ff,m,n); mzed_randomize(A); mzed_set_canary(A); return A; } static inline mzd_slice_t *random_mzd_slice_t(gf2e *ff, int m, int n) { mzd_slice_t *A = mzd_slice_init(ff,m,n); mzd_slice_randomize(A); mzd_slice_set_canary(A); return A; } static inline mzed_t *random_mzed_t_rank(gf2e *ff, const rci_t m, const rci_t n, const rci_t r) { mzed_t *U = mzed_init(ff, m, n); mzed_t *Ur = mzed_init_window(U, 0, 0, r, U->ncols); mzed_t *L = mzed_init(ff, m, m); mzed_randomize(L); mzed_randomize(Ur); for(rci_t i=0; incols; j++) { mzed_write_elem(L, i, j, 0); } mzed_write_elem(L, i, i, 1); } for(rci_t i=r; inrows; i++) { for(rci_t j=r+1; j < L->ncols; j++) { mzed_write_elem(L, i, j, 0); } } for(rci_t i=0; inrows; i++) { const rci_t ii = random() % A->nrows; mzed_row_swap(A, i, ii); }; for(rci_t i=0; incols; i++) { const rci_t ii = random() % A->ncols; mzed_col_swap(A, i, ii); }; mzed_set_canary(A); return A; } static inline int parse_parameters(int argc, char **argv) { int runlong = 0; int c; while ((c = getopt(argc, argv, "l")) != -1) { switch (c) { case 'l': runlong = 1; break; case '?': printf(" -l run long tests.\n"); abort(); default: abort(); } } return runlong; }