pax_global_header00006660000000000000000000000064131337050210014505gustar00rootroot0000000000000052 comment=aa644b22fff9e939e549a9759629be58b3c5cac2 qperf-0.4.10/000077500000000000000000000000001313370502100127045ustar00rootroot00000000000000qperf-0.4.10/AUTHORS000066400000000000000000000002231313370502100137510ustar00rootroot00000000000000Written by Johann George Thanks to the following for their contributions: Dotan Barak Ralph Campbell Yevgeny Kliteynik Dave Olson qperf-0.4.10/COPYING000066400000000000000000000431031313370502100137400ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. qperf-0.4.10/Makefile.am000066400000000000000000000001331313370502100147350ustar00rootroot00000000000000SUBDIRS = src EXTRA_DIST = qperf.spec.in dist-hook: qperf.spec cp qperf.spec $(distdir) qperf-0.4.10/README000066400000000000000000000010641313370502100135650ustar00rootroot00000000000000To build ./cleanup ./autogen.sh ./configure make Changing version * src/qperf.c: Change VER_MAJ, VER_MIN and VER_INC. * configure.in: Change in AC_INIT and AM_INIT_AUTOMAKE * qperf.spec: Change line beginning with Version: * Note ensure that qperf.spec is modified last so that cleanup does not delete it. Notes * If the library ibverbs is not found, a version of qperf is built that does not support the RDMA tests. * Running "make clean" does not seem to clean up everything. Run "./cleanup" instead. qperf-0.4.10/autogen.sh000077500000000000000000000002371313370502100147070ustar00rootroot00000000000000#!/bin/sh set -x for f in NEWS README ChangeLog; do [ -e "$f" ] || touch "$f" done aclocal && automake --foreign --add-missing --copy && autoconf qperf-0.4.10/cleanup000077500000000000000000000015431313370502100142640ustar00rootroot00000000000000#!/bin/sh # autotools makes such a mess. Clean up after it. # for f in AUTHORS NEWS README ChangeLog; do [ -s "$f" ] || rm -f "$f" done rm -r -f autom4te.cache .deps src/.deps rm -f `cat <qperf.1 help.c: help.txt if RDMA ./mkhelp RDMA else ./mkhelp endif clean-local: -rm -f qperf.1 qperf-0.4.10/src/help.txt000066400000000000000000001234021313370502100151660ustar00rootroot00000000000000Main Synopsis qperf qperf SERVERNODE [OPTIONS] TESTS Description qperf measures bandwidth and latency between two nodes. It can work over TCP/IP as well as the RDMA transports. On one of the nodes, qperf is typically run with no arguments designating it the server node. One may then run qperf on a client node to obtain measurements such as bandwidth, latency and cpu utilization. In its most basic form, qperf is run on one node in server mode by invoking it with no arguments. On the other node, it is run with two arguments: the name of the server node followed by the name of the test. A list of tests can be found in the section, TESTS. A variety of options may also be specified. One can get more detailed information on qperf by using the --help option. Below are examples of using the --help option: qperf --help examples Some examples of using qperf qperf --help opts Summary of options qperf --help options Description of options qperf --help tests Short summary and description of tests qperf --help TESTNAME More information on test TESTNAME Author Written by Johann George. Bugs None of the RDMA tests are available if qperf is compiled without the RDMA libraries. None of the XRC tests are available if qperf is compiled without the XRC extensions. The -f option is not yet implemented in many of the tests. Categories -RDMA To get help on a particular category, you may type: qperf --help CATEGORY where CATEGORY might be one of the following: categories This current list being displayed examples Some examples options A long list of options opts A short description of the options tests A list and description of the various tests or one of the following tests: conf quit rds_bw rds_lat sctp_bw sctp_lat sdp_bw sdp_lat tcp_bw tcp_lat udp_bw udp_lat Categories +RDMA To get help on a particular category, you may type: qperf --help CATEGORY where CATEGORY might be one of the following: categories This current list being displayed examples Some examples options A long list of options opts A short description of the options tests A list of tests CATEGORY may also be one of the following tests conf quit rc_bi_bw rc_bw rc_compare_swap_mr rc_fetch_add_mr rc_lat rc_rdma_read_bw rc_rdma_read_lat rc_rdma_write_bw rc_rdma_write_lat rc_rdma_write_poll_lat rds_bw rds_lat sctp_bw sctp_lat sdp_bw sdp_lat tcp_bw tcp_lat uc_bi_bw uc_bw uc_lat uc_rdma_write_bw uc_rdma_write_lat uc_rdma_write_poll_lat ud_bi_bw ud_bw ud_lat udp_bw udp_lat ver_rc_compare_swap ver_rc_fetch_add xrc_bi_bw xrc_bw xrc_lat Examples In these examples, we first run qperf on a node called myserver in server mode by invoking it with no arguments. In all the subsequent examples, we run qperf on another node and connect to the server which we assume has a hostname of myserver. * To run a TCP bandwidth and latency test: qperf myserver tcp_bw tcp_lat * To run a SDP bandwidth test for 10 seconds: qperf myserver -t 10 sdp_bw * To run a UDP latency test and then cause the server to terminate: qperf myserver udp_lat quit * To measure the RDMA UD latency and bandwidth: qperf myserver ud_lat ud_bw * To measure RDMA UC bi-directional bandwidth: qperf myserver rc_bi_bw * To get a range of TCP latencies with a message size from 1 to 64K qperf myserver -oo msg_size:1:64K:*2 -vu tcp_lat Opts --access_recv OnOff (-ar) Turn on/off accessing received data -ar1 Cause received data to be accessed --alt_port Port (-ap) Set alternate path port --loc_alt_port Port (-lap) Set local alternate path port --rem_alt_port Port (-rap) Set remote alternate path port --cpu_affinity PN (-ca) Set processor affinity --loc_cpu_affinity PN (-lca) Set local processor affinity --rem_cpu_affinity PN (-rca) Set remote processor affinity --flip OnOff (-f) Flip on/off sender and receiver -f1 Flip (on) sender and receiver --help Topic (-h) Get more information on a topic --host Node (-H) Identify server node --id Device:Port (-i) Set RDMA device and port --loc_id Device:Port (-li) Set local RDMA device and port --rem_id Device:Port (-ri) Set remote RDMA device and port --listen_port Port (-lp) Set server listen port --loop Var:Init:Last:Incr (-oo) Sequence through values --msg_size Size (-m) Set message size --mtu_size Size (-mt) Set MTU size (RDMA only) --no_msgs Count (-n) Send Count messages --cq_poll OnOff Set polling mode on/off --loc_cq_poll OnOff (-lcp) Set local polling mode on/off --rem_cq_poll OnOff (-rcp) Set remote polling mode on/off -cp1 Turn polling mode on -lcp1 Turn local polling mode on -rcp1 Turn remote polling mode on --ip_port Port (-ip) Set TCP port used for tests --precision Digits (-e) Set precision reported --rd_atomic Max (-nr) Set RDMA read/atomic count --loc_rd_atomic Max (-lnr) Set local RDMA read/atomic count --rem_rd_atomic Max (-rnr) Set remote RDMA read/atomic count --service_level SL (-sl) Set service level --service_level SL (-lsl) Set local service level --service_level SL (-rsl) Set remote service level --sock_buf_size Size (-sb) Set socket buffer size --loc_sock_buf_size Size (-lsb) Set local socket buffer size --rem_sock_buf_size Size (-rsb) Set remote socket buffer size --src_path_bits num (-sp) Set source path bits --loc_src_path_bits num (-lsp) Set local source path bits --rem_src_path_bits num (-rsp) Set remote source path bits --static_rate (-sr) Set IB static rate --loc_static_rate (-lsr) Set local IB static rate --rem_static_rate (-rsr) Set remote IB static rate --time Time (-t) Set test duration --timeout Time (-to) Set timeout --loc_timeout Time (-lto) Set local timeout --rem_timeout Time (-rto) Set remote timeout --unify_nodes (-un) Unify nodes --unify_units (-uu) Unify units --use_bits_per_sec (-ub) Use bits/sec rather than bytes/sec --use_cm OnOff (-cm) Use RDMA Connection Manager or not -cm1 Use RDMA Connection Manager --verbose (-v) Verbose; turn on all of -v[cstu] --verbose_conf (-vc) Show configuration information --verbose_stat (-vs) Show statistical information --verbose_time (-vt) Show timing information --verbose_used (-vu) Show information on parameters --verbose_more (-vv) More verbose; turn on all of -v[CSTU] --verbose_more_conf (-vvc) Show more configuration information --verbose_more_stat (-vvs) Show more statistical information --verbose_more_time (-vvt) Show more timing information --verbose_more_used (-vvu) Show more information on parameters --version (-V) Print out version --wait_server Time (-ws) Set time to wait for server Options --access_recv OnOff (-ar) If OnOff is non-zero, data is accessed once received. Otherwise, data is ignored. By default, OnOff is 0. This can help to mimic some applications. -ar1 Cause received data to be accessed. --alt_port Port (-ap) Set alternate path port. This enables automatic path failover. --loc_alt_port Port (-lap) Set local alternate path port. This enables automatic path failover. --rem_alt_port Port (-rap) Set remote alternate path port. This enables automatic path failover. --cpu_affinity PN (-ca) Set cpu affinity to PN. CPUs are numbered sequentially from 0. If PN is "any", any cpu is allowed otherwise the cpu is limited to the one specified. --loc_cpu_affinity PN (-lca) Set local processor affinity to PN. --rem_cpu_affinity PN (-rca) Set remote processor affinity to PN. --flip OnOff (-f) If non-zero, cause sender and receiver to play opposite roles. -f1 Cause sender and receiver to play opposite roles. --help Topic (-h) Print out information about Topic. To see the list of topics, type qperf --help --host Host (-H) Run test between the current node and the qperf running on node Host. This can also be specified as the first non-option argument. --id Device:Port (-i) Use RDMA Device and Port. --loc_id Device:Port (-li) Use local RDMA Device and Port. --rem_id Device:Port (-ri) Use remote RDMA Device and Port. --listen_port Port (-lp) Set the port we listen on to ListenPort. This must be set to the same port on both the server and client machines. The default value is 19765. --loop Var:Init:Last:Incr (-oo) Run a test multiple times sequencing through a series of values. Var is the loop variable; Init is the initial value; Last is the value it must not exceed and Incr is the increment. It is useful to set the --verbose_used (-vu) option in conjunction with this option. --msg_size Size (-m) Set the message size to Size. The default value varies by test. It is assumed that the value is specified in bytes however, a trailing kib or K, mib or M, or gib or G indicates that the size is being specified in kibibytes, mebibytes or gibibytes respectively while a trailing kb or k, mb or m, or gb or g indicates kilobytes, megabytes or gigabytes respectively. --mtu_size Size (-mt) Set the MTU size. Only relevant to the RDMA UC/RC tests. Units are specified in the same manner as the --msg_size option. --no_msgs N (-n) Set test duration by number of messages sent instead of time. --cq_poll OnOff (-cp) Turn polling mode on or off. This is only relevant to the RDMA tests and determines whether they poll or wait on the completion queues. If OnOff is 0, they wait; otherwise they poll. --loc_cq_poll OnOff (-lcp) Locally turn polling mode on or off. --rem_cq_poll OnOff (-rcp) Remotely turn polling mode on or off. -cp1 Turn polling mode on. -lcp1 Turn local polling mode on. -rcp1 Turn remote polling mode on. --ip_port Port (-ip) Use Port to run the socket tests. This is different from --listen_port which is used for synchronization. This is only relevant for the socket tests and refers to the TCP/UDP/SDP/RDS/SCTP port that the test is run on. --precision Digits (-e) Set the number of significant digits that are used to report results. --rd_atomic Max (-nr) Set the number of in-flight operations that can be handled for a RDMA read or atomic operation to Max. This is only relevant to the RDMA Read and Atomic tests. --loc_rd_atomic Max (-lnr) Set local read/atomic count. --rem_rd_atomic Max (-rnr) Set remote read/atomic count. --service_level SL (-sl) Set RDMA service level to SL. This is only used by the RDMA tests. The service level must be between 0 and 15. The default service level is 0. --loc_service_level SL (-lsl) Set local service level. --rem_service_level SL (-rsl) Set remote service level. --sock_buf_size Size (-sb) Set the socket buffer size. This is only relevant to the socket tests. --loc_sock_buf_size Size (-lsb) Set local socket buffer size. --rem_sock_buf_size Size (-rsb) Set remote socket buffer size. --src_path_bits N (-sp) Set source path bits. If the LMC is not zero, this will cause the connection to use a LID with the low order LMC bits set to N. --loc_src_path_bits N (-lsp) Set local source path bits. --rem_src_path_bits N (-rsp) Set remote source path bits. --static_rate Rate (-sr) Force InfiniBand static rate. Rate can be one of: 2.5, 5, 10, 20, 30, 40, 60, 80, 120, 1xSDR (2.5 Gbps), 1xDDR (5 Gbps), 1xQDR (10 Gbps), 4xSDR (2.5 Gbps), 4xDDR (5 Gbps), 4xQDR (10 Gbps), 8xSDR (2.5 Gbps), 8xDDR (5 Gbps), 8xQDR (10 Gbps). --loc_static_rate (-lsr) Force local InfiniBand static rate --rem_static_rate (-rsr) Force remote InfiniBand static rate --time Time (-t) Set test duration to Time. Specified in seconds however a trailing m, h or d indicates that the time is specified in minutes, hours or days respectively. --timeout Time (-to) Set timeout to Time. This is the timeout used for various things such as exchanging messages. The default is 5 seconds. --loc_timeout Time (-lto) Set local timeout to Time. This may be used on the server to set the timeout when initially exchanging data with each client. However, as soon as we receive the client's parameters, the client's remote timeout will override this parameter. --rem_timeout Time (-rto) Set remote timeout to Time. --unify_nodes (-un) Unify the nodes. Describe them in terms of local and remote rather than send and receive. --unify_units (-uu) Unify the units that results are shown in. Uses the lowest common denominator. Helpful for scripts. --use_bits_per_sec (-ub) Use bits/sec rather than bytes/sec when displaying networking speed. --use_cm OnOff (-cm) Use the RDMA Connection Manager (CM) if OnOff is non-zero. It is necessary to use the CM for iWARP devices. The default is to establish the connection without using the CM. This only works for the tests that use the RC transport. -cm1 Use RDMA Connection Manager. --verbose (-v) Provide more detailed output. Turns on -vc, -vs, -vt and -vu. --verbose_conf (-vc) Provide information on configuration. --verbose_stat (-vs) Provide information on statistics. --verbose_time (-vt) Provide information on timing. --verbose_used (-vu) Provide information on parameters used. --verbose_more (-vv) Provide even more detailed output. Turns on -vvc, -vvs, -vvt and -vvu. --verbose_more_conf (-vvc) Provide more information on configuration. --verbose_more_stat (-vvs) Provide more information on statistics. --verbose_more_time (-vvt) Provide more information on timing. --verbose_more_used (-vvu) Provide more information on parameters used. --version (-V) The current version of qperf is printed. --wait_server Time (-ws) If the server is not ready, continue to try connecting for Time seconds before giving up. The default is 5 seconds. Tests -RDMA Miscellaneous conf Show configuration quit Cause the server to quit Socket Based rds_bw RDS streaming one way bandwidth rds_lat RDS one way latency sctp_bw SCTP streaming one way bandwidth sctp_lat SCTP one way latency sdp_bw SDP streaming one way bandwidth sdp_lat SDP one way latency tcp_bw TCP streaming one way bandwidth tcp_lat TCP one way latency udp_bw UDP streaming one way bandwidth udp_lat UDP one way latency Tests +RDMA Miscellaneous conf Show configuration quit Cause the server to quit Socket Based rds_bw RDS streaming one way bandwidth rds_lat RDS one way latency sctp_bw SCTP streaming one way bandwidth sctp_lat SCTP one way latency sdp_bw SDP streaming one way bandwidth sdp_lat SDP one way latency tcp_bw TCP streaming one way bandwidth tcp_lat TCP one way latency udp_bw UDP streaming one way bandwidth udp_lat UDP one way latency RDMA Send/Receive rc_bi_bw RC streaming two way bandwidth rc_bw RC streaming one way bandwidth rc_lat RC one way latency uc_bi_bw UC streaming two way bandwidth uc_bw UC streaming one way bandwidth uc_lat UC one way latency ud_bi_bw UD streaming two way bandwidth ud_bw UD streaming one way bandwidth ud_lat UD one way latency xrc_bi_bw XRC streaming two way bandwidth xrc_bw XRC streaming one way bandwidth xrc_lat XRC one way latency RDMA rc_rdma_read_bw RC RDMA read streaming one way bandwidth rc_rdma_read_lat RC RDMA read one way latency rc_rdma_write_bw RC RDMA write streaming one way bandwidth rc_rdma_write_lat RC RDMA write one way latency rc_rdma_write_poll_lat RC RDMA write one way polling latency uc_rdma_write_bw UC RDMA write streaming one way bandwidth uc_rdma_write_lat UC RDMA write one way latency uc_rdma_write_poll_lat UC RDMA write one way polling latency InfiniBand Atomics rc_compare_swap_mr RC compare and swap messaging rate rc_fetch_add_mr RC fetch and add messaging rate Verification ver_rc_compare_swap Verify RC compare and swap ver_rc_fetch_add Verify RC fetch and add conf Purpose Show configuration Common Options None Description Shows the node name, CPUs and OS of both nodes being used. quit Purpose Quit Common Options None Description Causes the server to quit. rds_bw Purpose RDS streaming one way bandwidth Common Options --access_recv OnOff (-ar) Access received data --cpu_affinity PN (-ca) Set processor affinity --msg_size Size (-m) Set message size --sock_buf_size Size (-sb) Set socket buffer size --time (-t) Set test duration Other Options --listen_port, --ip_port, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client repeatedly sends messages to the server while the server notes how many were received. rds_lat Purpose RDS one way latency Common Options --cpu_affinity PN (-ca) Set processor affinity --msg_size Size (-m) Set message size --sock_buf_size Size (-sb) Set socket buffer size --time (-t) Set test duration Other Options --listen_port, --ip_port, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test where the server and client exchange messages repeatedly using RDS sockets. sctp_bw Purpose SCTP streaming one way bandwidth Common Options --access_recv OnOff (-ar) Access received data --cpu_affinity PN (-ca) Set processor affinity --msg_size Size (-m) Set message size --sock_buf_size Size (-sb) Set socket buffer size --time (-t) Set test duration Other Options --listen_port, --ip_port, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client repeatedly sends messages to the server while the server notes how many were received. sctp_lat Purpose SCTP one way latency Common Options --cpu_affinity PN (-ca) Set processor affinity --msg_size Size (-m) Set message size --sock_buf_size Size (-sb) Set socket buffer size --time (-t) Set test duration Other Options --listen_port, --ip_port, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test where the server and client exchange messages repeatedly using STCP sockets. sdp_bw Purpose SDP streaming one way bandwidth Common Options --access_recv OnOff (-ar) Access received data --cpu_affinity PN (-ca) Set processor affinity --msg_size Size (-m) Set message size --sock_buf_size Size (-sb) Set socket buffer size --time (-t) Set test duration Other Options --listen_port, --ip_port, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client repeatedly sends messages to the server while the server notes how many were received. sdp_lat Purpose SDP one way latency Common Options --cpu_affinity PN (-ca) Set processor affinity --msg_size Size (-m) Set message size --sock_buf_size Size (-sb) Set socket buffer size --time (-t) Set test duration Other Options --listen_port, --ip_port, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test where the server and client exchange messages repeatedly using SDP sockets. tcp_bw Purpose TCP streaming one way bandwidth Common Options --access_recv OnOff (-ar) Access received data --cpu_affinity PN (-ca) Set processor affinity --msg_size Size (-m) Set message size --sock_buf_size Size (-sb) Set socket buffer size --time (-t) Set test duration Other Options --listen_port, --ip_port, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client repeatedly sends messages to the server while the server notes how many were received. tcp_lat Purpose TCP one way latency Common Options --cpu_affinity PN (-ca) Set processor affinity --msg_size Size (-m) Set message size --sock_buf_size Size (-sb) Set socket buffer size --time (-t) Set test duration Other Options --listen_port, --ip_port, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test where the server and client exchange messages repeatedly using TCP sockets. udp_bw Purpose UDP streaming one way bandwidth Common Options --access_recv OnOff (-ar) Access received data --cpu_affinity PN (-ca) Set processor affinity --msg_size Size (-m) Set message size --sock_buf_size Size (-sb) Set socket buffer size --time (-t) Set test duration Other Options --listen_port, --ip_port, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client repeatedly sends messages to the server while the server notes how many were received. udp_lat Purpose UDP one way latency Common Options --cpu_affinity PN (-ca) Set processor affinity --msg_size Size (-m) Set message size --sock_buf_size Size (-sb) Set socket buffer size --time (-t) Set test duration Other Options --listen_port, --ip_port, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test where the server and client exchange messages repeatedly using UDP sockets. ud_bw +RDMA Purpose UD streaming one way bandwidth Common Options --access_recv OnOff (-ar) Access received data --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client sends messages to the server who notes how many it received. The UD Send/Receive mechanism is used. ud_bi_bw +RDMA Purpose UD streaming two way bandwidth Common Options --access_recv OnOff (-ar) Access received data --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description Both the client and server exchange messages with each other using the UD Send/Receive mechanism and note how many were received. ud_lat +RDMA Purpose UD one way latency Common Options --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test where the server and client exchange messages repeatedly using UD Send/Receive. rc_bw +RDMA Purpose RC streaming one way bandwidth Common Options --access_recv OnOff (-ar) Access received data --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client sends messages to the server who notes how many it received. The RC Send/Receive mechanism is used. rc_bi_bw +RDMA Purpose RC streaming two way bandwidth Common Options --access_recv OnOff (-ar) Access received data --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description Both the client and server exchange messages with each other using the RC Send/Receive mechanism and note how many were received. rc_lat +RDMA Purpose RC one way latency Common Options --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test where the server and client exchange messages repeatedly using RC Send/Receive. uc_bw +RDMA Purpose UC streaming one way bandwidth Common Options --access_recv OnOff (-ar) Access received data --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client sends messages to the server who notes how many it received. The UC Send/Receive mechanism is used. uc_bi_bw +RDMA Purpose UC streaming two way bandwidth Common Options --access_recv OnOff (-ar) Access received data --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description Both the client and server exchange messages with each other using the UC Send/Receive mechanism and note how many were received. uc_lat +RDMA Purpose UC one way latency Common Options --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test where the server and client exchange messages repeatedly using UC Send/Receive. rc_rdma_read_bw +RDMA Purpose RC RDMA read streaming one way bandwidth Common Options --access_recv OnOff (-ar) Access received data --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --rd_atomic, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client repeatedly performs RC RDMA Read operations and notes how many of them complete. rc_rdma_read_lat +RDMA Purpose RC RDMA read one way latency Common Options --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description The client repeatedly performs RC RDMA Read operations waiting for completion before starting the next one. rc_rdma_write_bw +RDMA Purpose RC RDMA write streaming one way bandwidth Common Options --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client repeatedly performs RC RDMA Write operations and notes how many of them complete. rc_rdma_write_lat +RDMA Purpose RC RDMA write one way latency Common Options --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test where the server and client exchange messages using RC RDMA write operations. rc_rdma_write_poll_lat +RDMA Purpose RC RDMA write one way polling latency Common Options --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test using RC RDMA Write operations. First the client performs an RDMA Write while the server stays in a tight loop waiting for the memory buffer to change. The first and last bytes of the memory buffer are tested to ensure that the entire message was received. This is then repeated with both sides playing opposite roles. Since this does not use completion queues, the --cq_poll flag has no effect. uc_rdma_write_bw +RDMA Purpose UC RDMA write streaming one way bandwidth Common Options --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client repeatedly performs UC RDMA Write operations and notes how many of them complete. uc_rdma_write_lat +RDMA Purpose UC RDMA write one way latency Common Options --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test where the server and client exchange messages using UC RDMA write operations. uc_rdma_write_poll_lat +RDMA Purpose UC RDMA write one way polling latency Common Options --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test using UC RDMA Write operations. First the client performs an RDMA Write while the server stays in a tight loop waiting for the memory buffer to change. The first and last bytes of the memory buffer are tested to ensure that the entire message was received. This is then repeated with both sides playing opposite roles. Since this does not use completion queues, the --cq_poll flag has no effect. rc_compare_swap_mr +RDMA Purpose RC compare and swap messaging rate Common Options --id Device:Port (-i) Set RDMA device and port --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --rd_atomic, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description The client repeatedly performs the RC Atomic Compare and Swap operation and determines how many of them complete. rc_fetch_add_mr +RDMA Purpose RC fetch and add messaging rate Common Options --id Device:Port (-i) Set RDMA device and port --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --rd_atomic, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description The client repeatedly performs the RC Atomic Fetch and Add operation and determines how many of them complete. ver_rc_compare_swap +RDMA Purpose Verify RC compare and swap Common Options --id Device:Port (-i) Set RDMA device and port --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --msg_size, --mtu_size, --rd_atomic, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description Test the RC Compare and Swap Atomic operation. The server's memory location starts with zero and the client successively makes exchanges with a variety of different values. The results are checked for correctness. ver_rc_fetch_add +RDMA Purpose Verify RC fetch and add Common Options --id Device:Port (-i) Set RDMA device and port --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --msg_size, --mtu_size, --rd_atomic, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description Tests the RC Fetch and Add Atomic operation. The server's memory location starts with zero and the client successively adds one. The results are checked for correctness. xrc_bw +RDMA Purpose XRC streaming one way bandwidth Common Options --access_recv OnOff (-ar) Access received data --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description The client sends messages to the server who notes how many it received. The XRC Send/Receive mechanism is used. xrc_bi_bw +RDMA Purpose XRC streaming two way bandwidth Common Options --access_recv OnOff (-ar) Access received data --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --use_bits_per_sec, --verbose Description Both the client and server exchange messages with each other using the XRC Send/Receive mechanism and note how many were received. xrc_lat +RDMA Purpose XRC one way latency Common Options --id Device:Port (-i) Set RDMA device and port --msg_size Size (-m) Set message size --cq_poll OnOff Set polling mode on/off --time (-t) Set test duration Other Options --cpu_affinity, --listen_port, --mtu_size, --static_rate, --timeout Display Options --precision, --unify_nodes, --unify_units, --verbose Description A ping pong latency test where the server and client exchange messages repeatedly using XRC Send/Receive. qperf-0.4.10/src/mkhelp000077500000000000000000000062731313370502100147110ustar00rootroot00000000000000#!/usr/bin/env perl # use strict; use warnings; use diagnostics; my $help_txt = "help.txt"; my $help_c = "help.c"; my $top = " /* * This was generated from $help_txt. Do not modify directly. * * Copyright (c) 2002-2009 Johann George. All rights reserved. * Copyright (c) 2006-2009 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ char *Usage[] ={ "; my $end = " 0, }; "; sub panic { print STDERR @_, "\n"; exit 1; } sub main() { my %defs; $defs{$_} = 1 for (@ARGV); my $iFile; open($iFile, "<", $help_txt) or panic("cannot find $help_txt"); my $str = ""; my $keep = 1; while (<$iFile>) { chomp; s/\s+$//; if (/^ / or /^$/) { if ($keep) { s///; s/(["\\])/\\$1/g; s/$/\\n/; if (/^(.{68}(?>[^\\]?))(..*)/) { $str .= " "x8 . "\"$1\"\n"; $str .= " "x12 . "\"$2\"\n"; } else { $str .= " "x8 . "\"$_\"\n"; } } } else { my @args = split; my $arg0 = lc(shift @args); $keep = 1; for (@args) { if (/^\+(.*)/) { $keep = 0 unless ($defs{$1}); } elsif (/^-(.*)/) { $keep = 0 if ($defs{$1}); } } if ($keep) { if ($str) { chop $str; $str .= ",\n"; } $str .= " "x4 . "\"$arg0\",\n"; } } } close $iFile; if ($str) { chop $str; $str .= ",\n"; } $top =~ s/^\n//; $end =~ s/^\n//; my $oFile; open($oFile, ">", $help_c) or panic("cannot create $help_c"); print $oFile $top, $str, $end; close $oFile; } main(); qperf-0.4.10/src/mkman000077500000000000000000000076541313370502100145400ustar00rootroot00000000000000#!/usr/bin/env perl # Make qperf man page. # use strict; use warnings; use diagnostics; use POSIX; my $help_txt = "help.txt"; # Print out an error message and exit. # sub panic { print STDERR @_, "\n"; exit 1; } # Print arguments to stdout. # sub printx { print STDOUT "@_"; } # Print arguments along with a newline to stdout. # sub printn { print STDOUT "@_\n"; } # Print a sub-heading as required in upper-case. # sub printh ($) { my $name = shift; printn ".SH ", uc $name; } # Undent some text by four columns. # sub undent { my $str = shift; $str =~ s/^ //gm; return $str; } # Generate the list of tests from the help.txt file. # sub do_tests ($$) { my $dict = shift; my $name = shift; my $text = $dict->{"Tests +RDMA"}; printh $name; $text =~ s/^\s*//; my @lines = split(/\n/, $text); for (@lines) { next unless /^ {8}(\w+)\s+(.*)/; printn '.TP'; printn "\\fB$1\\fP"; printn $2; } } # Generate the list of options from the help.txt file. # sub do_options ($$) { my $dict = shift; my $name = shift; my $text = $dict->{$name}; printh $name; $text =~ s/^\s*//; my @options = split(/^\s{4,6}(?=-)/m, $text); for my $option (@options) { $option =~ s/(.*)\n\s*//; my $head = $1; my $line = ""; ($head =~ s/(.*)\s+\((-\w+)\)/$1/) and $line = "\\fB$2\\fP, "; $head =~ /(\S+)\s*(.*)/; $line .= "\\fB$1\\fP"; $line .= " \\fI$2\\fP" if $2; printn '.TP'; printn $line; $option =~ s/^\s+//mg; $option =~ s/\s+$//; printn $option; } } # Generate the list of examples from the help.txt file. # sub do_examples ($$) { my $dict = shift; my $name = shift; my $text = $dict->{$name}; printh $name; my @lines = split(/^ {8}\* /m, $text); printx undent(shift @lines); for (@lines) { s/(.*)\n//; printn '.TP'; printn $1; s/^\s+//m; print $_; } } # Generate the synopsis from the help.txt file. # sub do_synopsis ($$) { my $dict = shift; my $name = shift; my $text = $dict->{$name}; printh $name; $text =~ s/([A-Z]+)/\\fI$1\\fP/g; $text =~ s/(qperf)/\\fB$1\\fP/g; $text =~ s/\n/\n.br\n/g; printx undent $text; } # Put out a sub-section that is gotten from a subsection within the Main # section of the help.txt file. # sub do_general ($$) { my $dict = shift; my $name = shift; printh $name; printx undent $dict->{$name}; } # Return the month and year that the help.txt file was last modified. # sub help_date() { my @info = stat $help_txt or panic("Cannot find $help_txt"); return POSIX::strftime "%B %Y", localtime($info[9]); } # Make a dictionary from text with the keys generated from text that is flush # agains the left column and its associated data being the text that is # subsequently indented. # sub make_dict ($) { my %dict; my $data = shift; my @entrys = split(/^(?=\S)/m, $data); (s/^(.*)\n//) and $dict{$1} = $_ for (@entrys); return \%dict; } # Return the contents of the help.txt file. # sub read_file ($) { my $name = shift; my $file; local $/; open($file, "<", $name) or panic("Cannot find $name"); my $data = <$file>; close $file; return $data; } sub main() { my $data = read_file $help_txt; my $help = make_dict $data; my $main = make_dict undent $help->{Main}; printn '.\" Generated by mkman'; printn '.TH QPERF 1 "' . help_date . '" "qperf" "User Commands"'; printn '.SH NAME'; printn 'qperf \- Measure RDMA and IP performance'; do_synopsis ($main, 'Synopsis'); do_general ($main, 'Description'); do_examples ($help, 'Examples'); do_options ($help, 'Options'); do_tests ($help, 'Tests'); do_general ($help, 'Author'); do_general ($help, 'Bugs'); } main(); qperf-0.4.10/src/qperf.c000066400000000000000000002406661313370502100147720ustar00rootroot00000000000000/* * qperf - main. * Measure socket and RDMA performance. * * Copyright (c) 2002-2009 Johann George. All rights reserved. * Copyright (c) 2006-2009 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "qperf.h" /* * Configurable parameters. If your change makes this version of qperf * incompatible with previous versions (usually a change to the Req structure), * increment VER_MIN and set VER_INC to 0. Otherwise, just increment VER_INC. * VER_MAJ is reserved for major changes. */ #define VER_MAJ 0 /* Major version */ #define VER_MIN 4 /* Minor version */ #define VER_INC 10 /* Incremental version */ #define LISTENQ 5 /* Size of listen queue */ #define BUFSIZE 1024 /* Size of buffers */ /* * Default parameter values. */ #define DEF_TIME 2 /* Test duration */ #define DEF_TIMEOUT 5 /* Timeout */ #define DEF_PRECISION 3 /* Precision displayed */ #define DEF_LISTEN_PORT 19765 /* Listen port */ /* * Option list. */ typedef struct OPTION { char *name; /* Name of option */ char *type; /* Type */ int arg1; /* First argument */ int arg2; /* Second argument */ } OPTION; /* * Used to loop through a range of values. */ typedef struct LOOP { struct LOOP *next; /* Pointer to next loop */ OPTION *option; /* Loop variable */ long init; /* Initial value */ long last; /* Last value */ long incr; /* Increment */ int mult; /* If set, multiply, otherwise add */ } LOOP; /* * Parameter information. */ typedef struct PAR_INFO { PAR_INDEX index; /* Index into parameter table */ int type; /* Type */ void *ptr; /* Pointer to value */ char *name; /* Option name */ int set; /* Parameter has been set */ int used; /* Parameter has been used */ int inuse; /* Parameter is in use */ } PAR_INFO; /* * Parameter name association. */ typedef struct PAR_NAME { char *name; /* Name */ PAR_INDEX loc_i; /* Local index */ PAR_INDEX rem_i; /* Remote index */ } PAR_NAME; /* * A simple mapping between two strings. */ typedef struct DICT { char *str1; /* String 1 */ char *str2; /* String 2 */ } DICT; /* * Test prototype. */ typedef struct TEST { char *name; /* Test name */ void (*client)(void); /* Client function */ void (*server)(void); /* Server function */ } TEST; /* * Used to save output data for formatting. */ typedef struct SHOW { char *pref; /* Name prefix */ char *name; /* Name */ char *data; /* Data */ char *unit; /* Unit */ char *altn; /* Alternative value */ } SHOW; /* * Configuration information. */ typedef struct CONF { char node[STRSIZE]; /* Node */ char cpu[STRSIZE]; /* CPU */ char os[STRSIZE]; /* Operating System */ char qperf[STRSIZE]; /* Qperf version */ } CONF; /* * Function prototypes. */ static void add_ustat(USTAT *l, USTAT *r); static long arg_long(char ***argvp); static long arg_size(char ***argvp); static char *arg_strn(char ***argvp); static long arg_time(char ***argvp); static void calc_node(RESN *resn, STAT *stat); static void calc_results(void); static void client(TEST *test); static int cmpsub(char *s2, char *s1); static char *commify(char *data); static void dec_req_data(REQ *host); static void dec_req_version(REQ *host); static void dec_stat(STAT *host); static void dec_ustat(USTAT *host); static void do_args(char *args[]); static void do_loop(LOOP *loop, TEST *test); static void do_option(OPTION *option, char ***argvp); static void enc_req(REQ *host); static void enc_stat(STAT *host); static void enc_ustat(USTAT *host); static TEST *find_test(char *name); static OPTION *find_option(char *name); static void get_conf(CONF *conf); static void get_cpu(CONF *conf); static void get_times(CLOCK timex[T_N]); static void initialize(void); static void init_lstat(void); static char *loop_arg(char **pp); static int nice_1024(char *pref, char *name, long long value); static PAR_INFO *par_info(PAR_INDEX index); static PAR_INFO *par_set(char *name, PAR_INDEX index); static int par_isset(PAR_INDEX index); static void parse_loop(char ***argvp); static void place_any(char *pref, char *name, char *unit, char *data, char *altn); static void place_show(void); static void place_val(char *pref, char *name, char *unit, double value); static void remotefd_close(void); static void remotefd_setup(void); static void run_client_conf(void); static void run_client_quit(void); static void run_server_conf(void); static void run_server_quit(void); static void server(void); static void server_listen(void); static int server_recv_request(void); static void set_affinity(void); static void set_signals(void); static void show_debug(void); static void show_info(MEASURE measure); static void show_rest(void); static void show_used(void); static void sig_alrm(int signo, siginfo_t *siginfo, void *ucontext); static void sig_quit(int signo, siginfo_t *siginfo, void *ucontext); static void sig_urg(int signo, siginfo_t *siginfo, void *ucontext); static char *skip_colon(char *s); static void start_test_timer(int seconds); static long str_size(char *arg, char *str); static void strncopy(char *d, char *s, int n); static char *two_args(char ***argvp); static int verbose(int type, double value); static void version_error(void); static void view_band(int type, char *pref, char *name, double value); static void view_cost(int type, char *pref, char *name, double value); static void view_cpus(int type, char *pref, char *name, double value); static void view_rate(int type, char *pref, char *name, double value); static void view_long(int type, char *pref, char *name, long long value); static void view_size(int type, char *pref, char *name, long long value); static void view_strn(int type, char *pref, char *name, char *value); static void view_time(int type, char *pref, char *name, double value); /* * Configurable variables. */ static int ListenPort = DEF_LISTEN_PORT; static int Precision = DEF_PRECISION; static int ServerWait = DEF_TIMEOUT; static int UseBitsPerSec = 0; /* * Static variables. */ static REQ RReq; static STAT IStat; static int ListenFD; static LOOP *Loops; static int ProcStatFD; static STAT RStat; static int ShowIndex; static SHOW ShowTable[256]; static int UnifyUnits; static int UnifyNodes; static int VerboseConf; static int VerboseStat; static int VerboseTime; static int VerboseUsed; /* * Global variables. */ RES Res; REQ Req; STAT LStat; char *TestName; char *ServerName; SS ServerAddr; int ServerAddrLen; int RemoteFD; int Debug; volatile int Finished; /* * Parameter names. This is used to print out the names of the parameters that * have been set. */ PAR_NAME ParName[] ={ { "access_recv", L_ACCESS_RECV, R_ACCESS_RECV }, { "affinity", L_AFFINITY, R_AFFINITY }, { "alt_port", L_ALT_PORT, R_ALT_PORT }, { "flip", L_FLIP, R_FLIP }, { "id", L_ID, R_ID }, { "msg_size", L_MSG_SIZE, R_MSG_SIZE }, { "mtu_size", L_MTU_SIZE, R_MTU_SIZE }, { "no_msgs", L_NO_MSGS, R_NO_MSGS }, { "poll_mode", L_POLL_MODE, R_POLL_MODE }, { "port", L_PORT, R_PORT }, { "rd_atomic", L_RD_ATOMIC, R_RD_ATOMIC }, { "service_level", L_SL, R_SL }, { "sock_buf_size", L_SOCK_BUF_SIZE, R_SOCK_BUF_SIZE }, { "src_path_bits", L_SRC_PATH_BITS, R_SRC_PATH_BITS }, { "time", L_TIME, R_TIME }, { "timeout", L_TIMEOUT, R_TIMEOUT }, { "use_cm", L_USE_CM, R_USE_CM }, }; /* * Parameters. These must be listed in the same order as the indices are * defined. */ PAR_INFO ParInfo[P_N] ={ { P_NULL, }, { L_ACCESS_RECV, 'l', &Req.access_recv }, { R_ACCESS_RECV, 'l', &RReq.access_recv }, { L_AFFINITY, 'l', &Req.affinity }, { R_AFFINITY, 'l', &RReq.affinity }, { L_ALT_PORT, 'l', &Req.alt_port }, { R_ALT_PORT, 'l', &RReq.alt_port }, { L_FLIP, 'l', &Req.flip }, { R_FLIP, 'l', &RReq.flip }, { L_ID, 'p', &Req.id }, { R_ID, 'p', &RReq.id }, { L_MSG_SIZE, 's', &Req.msg_size }, { R_MSG_SIZE, 's', &RReq.msg_size }, { L_MTU_SIZE, 's', &Req.mtu_size }, { R_MTU_SIZE, 's', &RReq.mtu_size }, { L_NO_MSGS, 'l', &Req.no_msgs }, { R_NO_MSGS, 'l', &RReq.no_msgs }, { L_POLL_MODE, 'l', &Req.poll_mode }, { R_POLL_MODE, 'l', &RReq.poll_mode }, { L_PORT, 'l', &Req.port }, { R_PORT, 'l', &RReq.port }, { L_RD_ATOMIC, 'l', &Req.rd_atomic }, { R_RD_ATOMIC, 'l', &RReq.rd_atomic }, { L_SL, 'l', &Req.sl }, { R_SL, 'l', &RReq.sl }, { L_SOCK_BUF_SIZE, 's', &Req.sock_buf_size }, { R_SOCK_BUF_SIZE, 's', &RReq.sock_buf_size }, { L_SRC_PATH_BITS, 's', &Req.src_path_bits }, { R_SRC_PATH_BITS, 's', &RReq.src_path_bits }, { L_STATIC_RATE, 'p', &Req.static_rate }, { R_STATIC_RATE, 'p', &RReq.static_rate }, { L_TIME, 't', &Req.time }, { R_TIME, 't', &RReq.time }, { L_TIMEOUT, 't', &Req.timeout }, { R_TIMEOUT, 't', &RReq.timeout }, { L_USE_CM, 'l', &Req.use_cm }, { R_USE_CM, 'l', &RReq.use_cm }, }; /* * Renamed options. First is old, second is new. */ DICT Renamed[] = { /* -a becomes -ca (--cpu_affinity) */ { "--affinity", "--cpu_affinity" }, { "-a", "-ca" }, { "--loc_affinity", "--loc_cpu_affinity" }, { "-la", "-lca" }, { "--rem_affinity", "--rem_cpu_affinity" }, { "-ra", "-rca" }, /* -r becomes -sr (--static_rate) */ { "--rate", "--static_rate" }, { "-r", "-sr" }, { "--loc_rate", "--loc_static_rate" }, { "-lr", "-lsr" }, { "--rem_rate", "--rem_static_rate" }, { "-rr", "-rsr" }, /* -p becomes -ip (--ip_port) */ { "--port", "--ip_port" }, { "-p", "-ip" }, /* -P becomes -cp (--cq_poll) */ { "--poll", "--cq_poll" }, { "-P", "-cp" }, { "--loc_poll", "--loc_cq_poll" }, { "-lP", "-lcp" }, { "--rem_poll", "--rem_cq_poll" }, { "-rP", "-rcp" }, /* -R becomes -nr (--rd_atomic) */ { "-R", "-nr" }, { "-lR", "-lnr" }, { "-rR", "-rnr" }, /* -T becomes -to (--timeout) */ { "-T", "-to" }, { "-lT", "-lto" }, { "-rT", "-rto" }, /* -S becomes -sb (--sock_buf_size) */ { "-S", "-sb" }, { "-lS", "-lsb" }, { "-rS", "-rsb" }, /* -W becomes -ws (--wait_server) */ { "--wait", "--wait_server" }, { "-W", "-ws" }, /* verbose options */ { "-vC", "-vvc", }, { "-vS", "-vvs", }, { "-vT", "-vvt", }, { "-vU", "-vvu", }, /* options that are on */ { "-aro", "-ar1" }, { "-cmo", "-cm1" }, { "-fo", "-f1" }, { "-cpo", "-cp1" }, { "-lcpo", "-lcp1" }, { "-rcpo", "-rcp1" }, /* miscellaneous */ { "-Ar", "-ar" }, { "-M", "-mt" }, { "-u", "-uu", }, }; /* * Options. The type field (2nd column) is used by do_option. If it begins * with a S, it is a valid server option. If it begins with a X, it is * obsolete and will eventually go away. */ OPTION Options[] ={ { "--access_recv", "int", L_ACCESS_RECV, R_ACCESS_RECV }, { "-ar", "int", L_ACCESS_RECV, R_ACCESS_RECV }, { "-ar1", "set1", L_ACCESS_RECV, R_ACCESS_RECV }, { "--alt_port", "int", L_ALT_PORT, R_ALT_PORT }, { "-ap", "int", L_ALT_PORT, R_ALT_PORT }, { "--loc_alt_port", "int", L_ALT_PORT, }, { "-lap", "int", L_ALT_PORT, }, { "--rem_alt_port", "int", R_ALT_PORT }, { "-rap", "int", R_ALT_PORT }, { "--cpu_affinity", "int", L_AFFINITY, R_AFFINITY }, { "-ca", "int", L_AFFINITY, R_AFFINITY }, { "--loc_cpu_affinity", "int", L_AFFINITY, }, { "-lca", "int", L_AFFINITY, }, { "--rem_cpu_affinity", "int", R_AFFINITY }, { "-rca", "int", R_AFFINITY }, { "--debug", "Sdebug", }, { "-D", "Sdebug", }, { "--flip", "int", L_FLIP, R_FLIP }, { "-f", "int", L_FLIP, R_FLIP }, { "-f1", "set1", L_FLIP, R_FLIP }, { "--help", "help" }, { "-h", "help" }, { "--host", "host", }, { "-H", "host", }, { "--id", "str", L_ID, R_ID }, { "-i", "str", L_ID, R_ID }, { "--loc_id", "str", L_ID, }, { "-li", "str", L_ID, }, { "--rem_id", "str", R_ID }, { "-ri", "str", R_ID }, { "--listen_port", "Slp", }, { "-lp", "Slp", }, { "--loop", "loop", }, { "-oo", "loop", }, { "--msg_size", "size", L_MSG_SIZE, R_MSG_SIZE }, { "-m", "size", L_MSG_SIZE, R_MSG_SIZE }, { "--mtu_size", "size", L_MTU_SIZE, R_MTU_SIZE }, { "-mt", "size", L_MTU_SIZE, R_MTU_SIZE }, { "--no_msgs", "int", L_NO_MSGS, R_NO_MSGS }, { "-n", "int", L_NO_MSGS, R_NO_MSGS }, { "--cq_poll", "int", L_POLL_MODE, R_POLL_MODE }, { "-cp", "int", L_POLL_MODE, R_POLL_MODE }, { "-cp1", "set1", L_POLL_MODE, R_POLL_MODE }, { "--loc_cq_poll", "int", L_POLL_MODE, }, { "-lcp", "int", L_POLL_MODE, }, { "-lcp1", "set1", L_POLL_MODE }, { "--rem_cq_poll", "int", R_POLL_MODE }, { "-rcp", "int", R_POLL_MODE }, { "-rcp1", "set1", R_POLL_MODE }, { "--ip_port", "int", L_PORT, R_PORT }, { "-ip", "int", L_PORT, R_PORT }, { "--precision", "precision", }, { "-e", "precision", }, { "--rd_atomic", "int", L_RD_ATOMIC, R_RD_ATOMIC }, { "-nr", "int", L_RD_ATOMIC, R_RD_ATOMIC }, { "--loc_rd_atomic", "int", L_RD_ATOMIC, }, { "-lnr", "int", L_RD_ATOMIC, }, { "--rem_rd_atomic", "int", R_RD_ATOMIC }, { "-rnr", "int", R_RD_ATOMIC }, { "--service_level", "sl", L_SL, R_SL }, { "-sl", "sl", L_SL, R_SL }, { "--loc_service_level", "sl", L_SL }, { "-lsl", "sl", L_SL }, { "--rem_service_level", "sl", R_SL }, { "-rsl", "sl", R_SL }, { "--sock_buf_size", "size", L_SOCK_BUF_SIZE, R_SOCK_BUF_SIZE }, { "-sb", "size", L_SOCK_BUF_SIZE, R_SOCK_BUF_SIZE }, { "--loc_sock_buf_size", "size", L_SOCK_BUF_SIZE }, { "-lsb", "size", L_SOCK_BUF_SIZE }, { "--rem_sock_buf_size", "size", R_SOCK_BUF_SIZE }, { "-rsb", "size", R_SOCK_BUF_SIZE }, { "--src_path_bits", "size", L_SRC_PATH_BITS, R_SRC_PATH_BITS }, { "-sp", "size", L_SRC_PATH_BITS, R_SRC_PATH_BITS }, { "--loc_src_path_bits", "size", L_SRC_PATH_BITS }, { "-lsp", "size", L_SRC_PATH_BITS }, { "--rem_src_path_bits", "size", R_SRC_PATH_BITS }, { "-rsp", "size", R_SRC_PATH_BITS }, { "--static_rate", "str", L_STATIC_RATE, R_STATIC_RATE }, { "-sr", "str", L_STATIC_RATE, R_STATIC_RATE }, { "--loc_static_rate", "str", L_STATIC_RATE }, { "-lsr", "str", L_STATIC_RATE }, { "--rem_static_rate", "str", R_STATIC_RATE }, { "-rsr", "str", R_STATIC_RATE }, { "--time", "time", L_TIME, R_TIME }, { "-t", "time", L_TIME, R_TIME }, { "--timeout", "time", L_TIMEOUT, R_TIMEOUT }, { "-to", "time", L_TIMEOUT, R_TIMEOUT }, { "--loc_timeout", "Stime", L_TIMEOUT }, { "-lto", "Stime", L_TIMEOUT }, { "--rem_timeout", "time", R_TIMEOUT }, { "-rto", "time", R_TIMEOUT }, { "--unify_nodes", "un", }, { "-un", "un", }, { "--unify_units", "uu", }, { "-uu", "uu", }, { "--use_bits_per_sec", "ub", }, { "-ub", "ub", }, { "--use_cm", "int", L_USE_CM, R_USE_CM }, { "-cm", "int", L_USE_CM, R_USE_CM }, { "-cm1", "set1", L_USE_CM, R_USE_CM }, { "--verbose", "v", }, { "-v", "v", }, { "--verbose_conf", "vc", }, { "-vc", "vc", }, { "--verbose_stat", "vs", }, { "-vs", "vs", }, { "--verbose_time", "vt", }, { "-vt", "vt", }, { "--verbose_used", "vu", }, { "-vu", "vu", }, { "--verbose_more", "vv", }, { "-vv", "vv", }, { "--verbose_more_conf", "vvc", }, { "-vvc", "vvc", }, { "--verbose_more_stat", "vvs", }, { "-vvs", "vvs", }, { "--verbose_more_time", "vvt", }, { "-vvt", "vvt", }, { "--verbose_more_used", "vvu", }, { "-vvu", "vvu", }, { "--version", "version", }, { "-V", "version", }, { "--wait_server", "wait", }, { "-ws", "wait", }, }; /* * Tests. */ #define test(n) { #n, run_client_##n, run_server_##n } TEST Tests[] ={ test(conf), test(quit), test(rds_bw), test(rds_lat), test(sctp_bw), test(sctp_lat), test(sdp_bw), test(sdp_lat), test(tcp_bw), test(tcp_lat), test(udp_bw), test(udp_lat), #ifdef RDMA test(rc_bi_bw), test(rc_bw), test(rc_compare_swap_mr), test(rc_fetch_add_mr), test(rc_lat), test(rc_rdma_read_bw), test(rc_rdma_read_lat), test(rc_rdma_write_bw), test(rc_rdma_write_lat), test(rc_rdma_write_poll_lat), test(uc_bi_bw), test(uc_bw), test(uc_lat), test(uc_rdma_write_bw), test(uc_rdma_write_lat), test(uc_rdma_write_poll_lat), test(ud_bi_bw), test(ud_bw), test(ud_lat), test(ver_rc_compare_swap), test(ver_rc_fetch_add), #ifdef HAS_XRC test(xrc_bi_bw), test(xrc_bw), test(xrc_lat), #endif /* HAS_XRC */ #endif }; int main(int argc, char *argv[]) { initialize(); set_signals(); do_args(&argv[1]); return 0; } /* * Initialize variables. */ static void initialize(void) { int i; RemoteFD = -1; for (i = 0; i < P_N; ++i) if (ParInfo[i].index != i) error(BUG, "initialize: ParInfo: out of order: %d", i); ProcStatFD = open("/proc/stat", 0); if (ProcStatFD < 0) error(SYS, "cannot open /proc/stat"); IStat.no_cpus = sysconf(_SC_NPROCESSORS_ONLN); IStat.no_ticks = sysconf(_SC_CLK_TCK); } /* * Look for a colon and skip past it and any spaces. */ static char * skip_colon(char *s) { for (;;) { int c = *s++; if (c == ':') break; if (c == '\0') return 0; } while (*s == ' ') s++; return s; } /* * A case insensitive string compare. s2 must at least contain all of s1 but * can be longer. */ static int cmpsub(char *s2, char *s1) { for (;;) { int c1 = *s1++; int c2 = *s2++; if (c1 == '\0') return 1; if (c2 == '\0') return 0; if (tolower(c1) != tolower(c2)) return 0; } } /* * Set up signal handlers. */ static void set_signals(void) { struct sigaction act ={ .sa_flags = SA_SIGINFO }; act.sa_sigaction = sig_alrm; sigaction(SIGALRM, &act, 0); sigaction(SIGPIPE, &act, 0); act.sa_sigaction = sig_quit; sigaction(SIGQUIT, &act, 0); act.sa_sigaction = sig_urg; sigaction(SIGURG, &act, 0); } /* * Note that time is up. */ static void sig_alrm(int signo, siginfo_t *siginfo, void *ucontext) { set_finished(); } /* * Our child sends us a quit when it wishes us to exit. */ static void sig_quit(int signo, siginfo_t *siginfo, void *ucontext) { exit(0); } /* * Called when a TCP/IP out-of-band message is received. */ static void sig_urg(int signo, siginfo_t *siginfo, void *ucontext) { urgent(); } /* * Parse arguments. */ static void do_args(char *args[]) { int isClient = 0; int testSpecified = 0; while (*args) { char *arg = *args; if (arg[0] == '-') { OPTION *option = find_option(arg); if (!option) error(0, "%s: bad option; try: qperf --help options", arg); if (option->type[0] != 'S') isClient = 1; do_option(option, &args); } else { isClient = 1; if (!ServerName) ServerName = arg; else { TEST *test = find_test(arg); if (!test) error(0, "%s: bad test; try: qperf --help tests", arg); do_loop(Loops, test); testSpecified = 1; } ++args; } } if (!isClient) server(); else if (!testSpecified) { if (!ServerName) error(0, "you used a client-only option but did not specify the " "server name.\nDo you want to be a client or server?"); if (find_test(ServerName)) error(0, "must specify host name first; try: qperf --help"); error(0, "must specify a test type; try: qperf --help"); } } /* * Loop through a series of tests. */ static void do_loop(LOOP *loop, TEST *test) { if (!loop) client(test); else { long l = loop->init; while (l <= loop->last) { char buf[64]; char *args[2] = {loop->option->name, buf}; char **argv = args; snprintf(buf, sizeof(buf), "%ld", l); do_option(loop->option, &argv); do_loop(loop->next, test); if (loop->mult) l *= loop->incr; else l += loop->incr; } } } /* * Given the name of an option, find it. */ static OPTION * find_option(char *name) { int n; DICT *d; OPTION *p; n = cardof(Renamed); d = Renamed; for (; n--; ++d) { if (streq(name, d->str1)) { char *msg = "warning: obsolete option: %s; use %s instead"; error(RET, msg, name, d->str2); name = d->str2; break; } } n = cardof(Options); p = Options; for (; n--; ++p) if (streq(name, p->name)) return p; return 0; } /* * Given the name of a test, find it. */ static TEST * find_test(char *name) { int n = cardof(Tests); TEST *p = Tests; for (; n--; ++p) if (streq(name, p->name)) return p; return 0; } /* * Handle options. */ static void do_option(OPTION *option, char ***argvp) { char *t = option->type; if (*t == 'S') ++t; if (streq(t, "debug")) { Debug = 1; *argvp += 1; } else if (streq(t, "help")) { /* Help */ char **usage; char *category = (*argvp)[1]; if (!category) category = "main"; for (usage = Usage; *usage; usage += 2) if (streq(*usage, category)) break; if (!*usage) { error(0, "cannot find help category %s; try: qperf --help categories", category); } printf("%s", usage[1]); exit(0); } else if (streq(t, "host")) { ServerName = arg_strn(argvp); } else if (streq(t, "int")) { long v = arg_long(argvp); setp_u32(option->name, option->arg1, v); setp_u32(option->name, option->arg2, v); } else if (streq(t, "loop")) { parse_loop(argvp); } else if (streq(t, "lp")) { ListenPort = arg_long(argvp); } else if (streq(t, "precision")) { Precision = arg_long(argvp); } else if (streq(t, "set1")) { setp_u32(option->name, option->arg1, 1); setp_u32(option->name, option->arg2, 1); *argvp += 1; } else if (streq(t, "size")) { long v = arg_size(argvp); setp_u32(option->name, option->arg1, v); setp_u32(option->name, option->arg2, v); } else if (streq(t, "sl")) { long v = arg_long(argvp); if (v < 0 || v > 15) error(0, "service level must be between 0 and 15: %d given", v); setp_u32(option->name, option->arg1, v); setp_u32(option->name, option->arg2, v); } else if (streq(t, "str")) { char *s = arg_strn(argvp); setp_str(option->name, option->arg1, s); setp_str(option->name, option->arg2, s); } else if (streq(t, "time")) { long v = arg_time(argvp); setp_u32(option->name, option->arg1, v); setp_u32(option->name, option->arg2, v); } else if (streq(t, "ub")) { UseBitsPerSec = 1; *argvp += 1; } else if (streq(t, "un")) { UnifyNodes = 1; *argvp += 1; } else if (streq(t, "uu")) { UnifyUnits = 1; *argvp += 1; } else if (streq(t, "v")) { if (VerboseConf < 1) VerboseConf = 1; if (VerboseStat < 1) VerboseStat = 1; if (VerboseTime < 1) VerboseTime = 1; if (VerboseUsed < 1) VerboseUsed = 1; *argvp += 1; } else if (streq(t, "vc")) { VerboseConf = 1; *argvp += 1; } else if (streq(t, "version")) { printf("qperf %d.%d.%d\n", VER_MAJ, VER_MIN, VER_INC); exit(0); } else if (streq(t, "vs")) { VerboseStat = 1; *argvp += 1; } else if (streq(t, "vt")) { VerboseTime = 1; *argvp += 1; } else if (streq(t, "vu")) { VerboseUsed = 1; *argvp += 1; } else if (streq(t, "vv")) { VerboseConf = 2; VerboseStat = 2; VerboseTime = 2; VerboseUsed = 2; *argvp += 1; } else if (streq(t, "vvc")) { VerboseConf = 2; *argvp += 1; } else if (streq(t, "vvs")) { VerboseStat = 2; *argvp += 1; } else if (streq(t, "vvt")) { VerboseTime = 2; *argvp += 1; } else if (streq(t, "vvu")) { VerboseUsed = 2; *argvp += 1; } else if (streq(t, "wait")) { ServerWait = arg_time(argvp); } else error(BUG, "do_option: unknown type: %s", t); } /* * Parse a loop option. */ static void parse_loop(char ***argvp) { char *opt = **argvp; char *s = two_args(argvp); char *name = loop_arg(&s); char *init = loop_arg(&s); char *last = loop_arg(&s); char *incr = loop_arg(&s); LOOP *loop = qmalloc(sizeof(LOOP)); memset(loop, 0, sizeof(*loop)); /* Parse variable name */ { int n = cardof(Options); OPTION *p = Options; if (!name) name = "msg_size"; for (;;) { char *s = p->name; if (n-- == 0) error(0, "%s: %s: no such variable", opt, name); if (*s++ != '-') continue; if (*s == '-') s++; if (streq(name, s)) break; p++; } loop->option = p; } /* Parse increment */ if (!incr) loop->incr = 0; else { if (incr[0] == '*') { incr++; loop->mult = 1; } loop->incr = str_size(incr, opt); if (loop->incr < 1) error(0, "%s: %s: increment must be positive", opt, incr); } /* Parse initial value */ if (init) loop->init = str_size(init, opt); else loop->init = loop->mult ? 1 : 0; /* Parse last value */ if (!last) error(0, "%s: must specify limit", opt); loop->last = str_size(last, opt); /* Insert into loop list */ if (!Loops) Loops = loop; else { LOOP *l = Loops; while (l->next) l = l->next; l->next = loop; } } /* * Given a string consisting of arguments separated by colons, return the next * argument and prepare for scanning the next one. */ static char * loop_arg(char **pp) { char *a = *pp; char *p = a; while (*p) { if (*p == ':') { *p = '\0'; *pp = p + 1; break; } ++p; } return a[0] ? a : 0; } /* * Ensure that two arguments exist. */ static char * two_args(char ***argvp) { char **argv = *argvp; if (!argv[1]) error(0, "%s: missing argument", argv[0]); *argvp += 2; return argv[1]; } /* * Return the value of a long argument. It must be non-negative. */ static long arg_long(char ***argvp) { char **argv = *argvp; char *p; long l; if (!argv[1]) error(0, "missing argument to %s", argv[0]); l = strtol(argv[1], &p, 10); if (p[0] != '\0') error(0, "bad argument: %s", argv[1]); if (l < 0) error(0, "%s requires a non-negative number", argv[0]); *argvp += 2; return l; } /* * Return the value of a size argument. */ static long arg_size(char ***argvp) { long l; char **argv = *argvp; *argvp += 2; if (!argv[1]) error(0, "missing argument to %s", argv[0]); l = str_size(argv[1], argv[0]); if (l < 0) error(0, "%s requires a non-negative number", argv[0]); return l; } /* * Scan a size argument from a string. */ static long str_size(char *str, char *arg) { char *p; long m = 1; long double d = strtold(str, &p); if (p[0] == '\0') m = 1; else if (streq(p, "kb") || streq(p, "k")) m = 1000; else if (streq(p, "mb") || streq(p, "m")) m = 1000 * 1000; else if (streq(p, "gb") || streq(p, "g")) m = 1000 * 1000 * 1000; else if (streq(p, "kib") || streq(p, "K")) m = 1024; else if (streq(p, "mib") || streq(p, "M")) m = 1024 * 1024; else if (streq(p, "gib") || streq(p, "G")) m = 1024 * 1024 * 1024; else error(0, "%s: bad size: %s", arg, str); return d * m; } /* * Return the value of a string argument. */ static char * arg_strn(char ***argvp) { char **argv = *argvp; if (!argv[1]) error(0, "missing argument to %s", argv[0]); *argvp += 2; return argv[1]; } /* * Return the value of a size argument. */ static long arg_time(char ***argvp) { char *p; long double d; long l = 0; char **argv = *argvp; if (!argv[1]) error(0, "missing argument to %s", argv[0]); d = strtold(argv[1], &p); if (d < 0) error(0, "%s requires a non-negative number", argv[0]); if (p[0] == '\0') l = (long)d; else { int u = *p; if (p[1] != '\0') error(0, "bad argument: %s", argv[1]); if (u == 's' || u == 'S') l = (long)d; else if (u == 'm' || u == 'M') l = (long)(d * (60)); else if (u == 'h' || u == 'H') l = (long)(d * (60 * 60)); else if (u == 'd' || u == 'D') l = (long)(d * (60 * 60 * 24)); else error(0, "bad argument: %s", argv[1]); } *argvp += 2; return l; } /* * Set a value stored in a 32 bit value without letting anyone know we set it. */ void setv_u32(PAR_INDEX index, uint32_t l) { PAR_INFO *p = par_info(index); *((uint32_t *)p->ptr) = l; } /* * Set an option stored in a 32 bit value. */ void setp_u32(char *name, PAR_INDEX index, uint32_t l) { PAR_INFO *p = par_set(name, index); if (!p) return; *((uint32_t *)p->ptr) = l; } /* * Set an option stored in a string vector. */ void setp_str(char *name, PAR_INDEX index, char *s) { PAR_INFO *p = par_set(name, index); if (!p) return; if (strlen(s) >= STRSIZE) error(0, "%s: too long", s); strcpy(p->ptr, s); } /* * Note a parameter as being used. */ void par_use(PAR_INDEX index) { PAR_INFO *p = par_info(index); p->used = 1; p->inuse = 1; } /* * Set the PAR_INFO.name value. */ static PAR_INFO * par_set(char *name, PAR_INDEX index) { PAR_INFO *p = par_info(index); if (index == P_NULL) return 0; if (name) { p->name = name; p->set = 1; } else { p->used = 1; p->inuse = 1; if (p->name) return 0; } return p; } /* * Determine if a parameter is set. */ static int par_isset(PAR_INDEX index) { return par_info(index)->name != 0; } /* * Index the ParInfo table. */ static PAR_INFO * par_info(PAR_INDEX index) { PAR_INFO *p = &ParInfo[index]; if (index != p->index) error(BUG, "par_info: table out of order: %d != %d", index, p-index); return p; } /* * If any options were set but were not used, print out a warning message for * the user. */ void opt_check(void) { PAR_INFO *p; PAR_INFO *q; PAR_INFO *r = endof(ParInfo); for (p = ParInfo; p < r; ++p) { if (p->used || !p->set) continue; error(RET, "warning: %s set but not used in test %s", p->name, TestName); for (q = p+1; q < r; ++q) if (q->set && q->name == p->name) q->set = 0; } } /* * Server. */ static void server(void) { server_listen(); for (;;) { REQ req; pid_t pid; TEST *test; int s = offset(REQ, req_index); debug("ready for requests"); if (!server_recv_request()) continue; pid = fork(); if (pid < 0) { error(SYS|RET, "fork failed"); continue; } if (pid > 0) { remotefd_close(); waitpid(pid, 0, 0); continue; } remotefd_setup(); recv_mesg(&req, s, "request version"); dec_init(&req); dec_req_version(&Req); if (Req.ver_maj != VER_MAJ || Req.ver_min != VER_MIN) version_error(); recv_mesg(&req.req_index, sizeof(req)-s, "request data"); dec_req_data(&Req); if (Req.req_index >= cardof(Tests)) error(0, "bad request index: %d", Req.req_index); test = &Tests[Req.req_index]; TestName = test->name; debug("received request: %s", TestName); init_lstat(); set_affinity(); (test->server)(); exit(0); } close(ListenFD); } /* * If there is a version mismatch of qperf between the client and server, tell * the user which needs to be upgraded. */ static void version_error(void) { int hi_maj = Req.ver_maj; int hi_min = Req.ver_min; int hi_inc = Req.ver_inc; int lo_maj = VER_MAJ; int lo_min = VER_MIN; int lo_inc = VER_INC; char *msg = "upgrade qperf on %s from %d.%d.%d to %d.%d.%d"; char *low = "server"; if (lo_maj > hi_maj || (lo_maj == hi_maj && lo_min > hi_min)) { hi_maj = VER_MAJ; hi_min = VER_MIN; hi_inc = VER_INC; lo_maj = Req.ver_maj; lo_min = Req.ver_min; lo_inc = Req.ver_inc; low = "client"; } error(0, msg, low, lo_maj, lo_min, lo_inc, hi_maj, hi_min, hi_inc); } /* * Listen for any requests. */ static void server_listen(void) { AI *ai; AI hints ={ .ai_flags = AI_PASSIVE | AI_NUMERICSERV, .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; AI *ailist = getaddrinfo_port(0, ListenPort, &hints); for (ai = ailist; ai; ai = ai->ai_next) { ListenFD = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); if (ListenFD < 0) continue; setsockopt_one(ListenFD, SO_REUSEADDR); if (bind(ListenFD, ai->ai_addr, ai->ai_addrlen) == SUCCESS0) break; close(ListenFD); } freeaddrinfo(ailist); if (!ai) error(0, "unable to bind to listen port"); if (!Req.timeout) Req.timeout = DEF_TIMEOUT; if (listen(ListenFD, LISTENQ) < 0) error(SYS, "listen failed"); } /* * Accept a request from a client. */ static int server_recv_request(void) { socklen_t clientLen; struct sockaddr_in clientAddr; clientLen = sizeof(clientAddr); RemoteFD = accept(ListenFD, (struct sockaddr *)&clientAddr, &clientLen); if (RemoteFD < 0) return error(SYS|RET, "accept failed"); return 1; } /* * Client. */ static void client(TEST *test) { int i; for (i = 0; i < P_N; ++i) ParInfo[i].inuse = 0; if (!par_isset(L_NO_MSGS)) setp_u32(0, L_TIME, DEF_TIME); if (!par_isset(R_NO_MSGS)) setp_u32(0, R_TIME, DEF_TIME); setp_u32(0, L_TIMEOUT, DEF_TIMEOUT); setp_u32(0, R_TIMEOUT, DEF_TIMEOUT); par_use(L_AFFINITY); par_use(R_AFFINITY); par_use(L_TIME); par_use(R_TIME); set_affinity(); RReq.ver_maj = VER_MAJ; RReq.ver_min = VER_MIN; RReq.ver_inc = VER_INC; RReq.req_index = test - Tests; TestName = test->name; debug("sending request: %s", TestName); init_lstat(); printf("%s:\n", TestName); (*test->client)(); remotefd_close(); place_show(); } /* * Send a request to the server. */ void client_send_request(void) { REQ req; AI *a; AI hints ={ .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; AI *ailist = getaddrinfo_port(ServerName, ListenPort, &hints); RemoteFD = -1; if (ServerWait) start_test_timer(ServerWait); for (;;) { for (a = ailist; a; a = a->ai_next) { if (Finished) break; RemoteFD = socket(a->ai_family, a->ai_socktype, a->ai_protocol); if (RemoteFD < 0) continue; if (connect(RemoteFD, a->ai_addr, a->ai_addrlen) != SUCCESS0) { remotefd_close(); continue; } ServerAddrLen = a->ai_addrlen; memcpy(&ServerAddr, a->ai_addr, ServerAddrLen); break; } if (RemoteFD >= 0 || !ServerWait || Finished) break; sleep(1); } if (ServerWait) stop_test_timer(); freeaddrinfo(ailist); if (RemoteFD < 0) error(0, "%s: failed to connect", ServerName); remotefd_setup(); enc_init(&req); enc_req(&RReq); send_mesg(&req, sizeof(req), "request data"); } /* * Configure the remote file descriptor. */ static void remotefd_setup(void) { int one = 1; if (ioctl(RemoteFD, FIONBIO, &one) < 0) error(SYS, "ioctl FIONBIO failed"); if (fcntl(RemoteFD, F_SETOWN, getpid()) < 0) error(SYS, "fcntl F_SETOWN failed"); } /* * Close the remote file descriptor. * Set a file descriptor to non-blocking. */ static void remotefd_close(void) { close(RemoteFD); RemoteFD = -1; } /* * Exchange results. We sync up only to ensure that the client is out of its * loop so we can close our socket or whatever communication medium we are * using. */ void exchange_results(void) { STAT stat; if (is_client()) { recv_mesg(&stat, sizeof(stat), "results"); dec_init(&stat); dec_stat(&RStat); send_sync("synchronization after test"); } else { enc_init(&stat); enc_stat(&LStat); send_mesg(&stat, sizeof(stat), "results"); recv_sync("synchronization after test"); } } /* * Initialize local status information. */ static void init_lstat(void) { memcpy(&LStat, &IStat, sizeof(LStat)); } /* * Show configuration (client side). */ static void run_client_conf(void) { CONF lconf; CONF rconf; client_send_request(); recv_mesg(&rconf, sizeof(rconf), "configuration"); get_conf(&lconf); view_strn('a', "", "loc_node", lconf.node); view_strn('a', "", "loc_cpu", lconf.cpu); view_strn('a', "", "loc_os", lconf.os); view_strn('a', "", "loc_qperf", lconf.qperf); view_strn('a', "", "rem_node", rconf.node); view_strn('a', "", "rem_cpu", rconf.cpu); view_strn('a', "", "rem_os", rconf.os); view_strn('a', "", "rem_qperf", rconf.qperf); } /* * Show configuration (server side). */ static void run_server_conf(void) { CONF conf; get_conf(&conf); send_mesg(&conf, sizeof(conf), "configuration"); } /* * Get configuration. */ static void get_conf(CONF *conf) { struct utsname utsname; uname(&utsname); strncopy(conf->node, utsname.nodename, sizeof(conf->node)); snprintf(conf->os, sizeof(conf->os), "%s %s", utsname.sysname, utsname.release); get_cpu(conf); snprintf(conf->qperf, sizeof(conf->qperf), "%d.%d.%d", VER_MAJ, VER_MIN, VER_INC); } /* * Get CPU information. */ static void get_cpu(CONF *conf) { char count[STRSIZE]; char speed[STRSIZE]; char buf[BUFSIZE]; char cpu[BUFSIZE]; char mhz[BUFSIZE]; int cpus = 0; int mixed = 0; FILE *fp = fopen("/proc/cpuinfo", "r"); if (!fp) error(0, "cannot open /proc/cpuinfo"); cpu[0] = '\0'; mhz[0] = '\0'; while (fgets(buf, sizeof(buf), fp)) { int n = strlen(buf); if (cmpsub(buf, "model name")) { ++cpus; if (!mixed) { if (cpu[0] == '\0') strncopy(cpu, buf, sizeof(cpu)); else if (!streq(buf, cpu)) mixed = 1; } } else if (cmpsub(buf, "cpu MHz")) { if (!mixed) { if (mhz[0] == '\0') strncopy(mhz, buf, sizeof(mhz)); else if (!streq(buf, mhz)) mixed = 1; } } while (n && buf[n-1] != '\n') { if (!fgets(buf, sizeof(buf), fp)) break; n = strlen(buf); } } fclose(fp); /* CPU name */ if (mixed) strncopy(cpu, "Mixed CPUs", sizeof(cpu)); else { char *p = cpu; char *q = skip_colon(cpu); if (!q) return; for (;;) { if (*q == '(' && cmpsub(q, "(r)")) q += 3; else if (*q == '(' && cmpsub(q, "(tm)")) q += 4; if (tolower(*q) == 'c' && cmpsub(q, "cpu ")) q += 4; if (tolower(*q) == 'p' && cmpsub(q, "processor ")) q += 10; else if (q[0] == ' ' && q[1] == ' ') q += 1; else if (q[0] == '\n') q += 1; else if (!(*p++ = *q++)) break; } } /* CPU speed */ speed[0] = '\0'; if (!mixed) { int n = strlen(cpu); if (n < 3 || cpu[n-2] != 'H' || cpu[n-1] != 'z') { char *q = skip_colon(mhz); if (q) { int freq = atoi(q); if (freq < 1000) snprintf(speed, sizeof(speed), " %dMHz", freq); else snprintf(speed, sizeof(speed), " %.1fGHz", freq/1000.0); } } } /* Number of CPUs */ if (cpus == 1) count[0] = '\0'; else snprintf(count, sizeof(count), "%d Cores: ", cpus); snprintf(conf->cpu, sizeof(conf->cpu), "%s%s%s", count, cpu, speed); } /* * Quit (client side). */ static void run_client_quit(void) { opt_check(); client_send_request(); sync_test(); exit(0); } /* * Quit (server side). The read is to ensure that the client first quits to * ensure that everything closes down cleanly. */ static void run_server_quit(void) { char buf[1]; sync_test(); (void) read(RemoteFD, buf, sizeof(buf)); kill(getppid(), SIGQUIT); exit(0); } /* * Synchronize the client and server. */ void sync_test(void) { synchronize("synchronization before test"); start_test_timer(Req.time); } /* * Start test timer. */ static void start_test_timer(int seconds) { struct itimerval itimerval = {{0}}; Finished = 0; get_times(LStat.time_s); setitimer(ITIMER_REAL, &itimerval, 0); if (!seconds) return; debug("starting timer for %d seconds", seconds); itimerval.it_value.tv_sec = seconds; /* * SLES11 has high precision timers; too low an interval will cause timer * to fire extremely rapidly after first occurrence. We set it to 10 ms. */ itimerval.it_interval.tv_usec = 10000; setitimer(ITIMER_REAL, &itimerval, 0); } /* * Stop timing. Note that the end time is obtained by the first call to * set_finished. In the tests, when SIGALRM goes off, it may be executing a * system call which gets interrupted. If SIGALRM goes off after Finished is * checked but before the system call is initiated, the system call will be * executed and it will take the second SIGALRM call generated by the interval * timer to wake it up. Hence, we save the end times in sig_alrm. Note that * if Finished is set, we reject any packets that are sent or arrive in order * not to cheat. We clear Finished since code assumes that it is the default * state. */ void stop_test_timer(void) { struct itimerval itimerval = {{0}}; set_finished(); setitimer(ITIMER_REAL, &itimerval, 0); Finished = 0; debug("stopping timer"); } /* * Establish the current test as finished. */ void set_finished(void) { if (Finished++ == 0) get_times(LStat.time_e); } /* * Show results. */ void show_results(MEASURE measure) { calc_results(); show_info(measure); } /* * Calculate results. */ static void calc_results(void) { double no_msgs; double locTime; double remTime; double midTime; double gB = 1000 * 1000 * 1000; add_ustat(&LStat.s, &RStat.rem_s); add_ustat(&LStat.r, &RStat.rem_r); add_ustat(&RStat.s, &LStat.rem_s); add_ustat(&RStat.r, &LStat.rem_r); memset(&Res, 0, sizeof(Res)); calc_node(&Res.l, &LStat); calc_node(&Res.r, &RStat); no_msgs = LStat.r.no_msgs + RStat.r.no_msgs; if (no_msgs) Res.latency = Res.l.time_real / no_msgs; locTime = Res.l.time_real; remTime = Res.r.time_real; midTime = (locTime + remTime) / 2; if (locTime == 0 || remTime == 0) return; /* Calculate messaging rate */ if (!RStat.r.no_msgs) Res.msg_rate = LStat.r.no_msgs / remTime; else if (!LStat.r.no_msgs) Res.msg_rate = RStat.r.no_msgs / locTime; else Res.msg_rate = (LStat.r.no_msgs + RStat.r.no_msgs) / midTime; /* Calculate send bandwidth */ if (!RStat.s.no_bytes) Res.send_bw = LStat.s.no_bytes / locTime; else if (!LStat.s.no_bytes) Res.send_bw = RStat.s.no_bytes / remTime; else Res.send_bw = (LStat.s.no_bytes + RStat.s.no_bytes) / midTime; /* Calculate receive bandwidth. */ if (!RStat.r.no_bytes) Res.recv_bw = LStat.r.no_bytes / locTime; else if (!LStat.r.no_bytes) Res.recv_bw = RStat.r.no_bytes / remTime; else Res.recv_bw = (LStat.r.no_bytes + RStat.r.no_bytes) / midTime; /* Calculate costs */ if (LStat.s.no_bytes && !LStat.r.no_bytes && !RStat.s.no_bytes) Res.send_cost = Res.l.time_cpu*gB / LStat.s.no_bytes; else if (RStat.s.no_bytes && !RStat.r.no_bytes && !LStat.s.no_bytes) Res.send_cost = Res.r.time_cpu*gB / RStat.s.no_bytes; if (RStat.r.no_bytes && !RStat.s.no_bytes && !LStat.r.no_bytes) Res.recv_cost = Res.r.time_cpu*gB / RStat.r.no_bytes; else if (LStat.r.no_bytes && !LStat.s.no_bytes && !RStat.r.no_bytes) Res.recv_cost = Res.l.time_cpu*gB / LStat.r.no_bytes; } /* * Determine the number of packets left to send. */ int left_to_send(long *sentp, int room) { int n; if (!Req.no_msgs) return room; n = Req.no_msgs - *sentp; if (n <= 0) return 0; if (n > room) return room; return n; } /* * Combine statistics that the remote node kept track of with those that the * local node kept. */ static void add_ustat(USTAT *l, USTAT *r) { l->no_bytes += r->no_bytes; l->no_msgs += r->no_msgs; l->no_errs += r->no_errs; } /* * Calculate time values for a node. */ static void calc_node(RESN *resn, STAT *stat) { int i; CLOCK cpu; double s = stat->time_e[T_REAL] - stat->time_s[T_REAL]; memset(resn, 0, sizeof(*resn)); if (s == 0) return; if (stat->no_ticks == 0) return; resn->time_real = s / stat->no_ticks; cpu = 0; for (i = 0; i < T_N; ++i) if (i != T_REAL && i != T_IDLE) cpu += stat->time_e[i] - stat->time_s[i]; resn->time_cpu = (float) cpu / stat->no_ticks; resn->cpu_user = (stat->time_e[T_USER] - stat->time_s[T_USER] + stat->time_e[T_NICE] - stat->time_s[T_NICE]) / s; resn->cpu_intr = (stat->time_e[T_IRQ] - stat->time_s[T_IRQ] + stat->time_e[T_SOFTIRQ] - stat->time_s[T_SOFTIRQ]) / s; resn->cpu_idle = (stat->time_e[T_IDLE] - stat->time_s[T_IDLE]) / s; resn->cpu_kernel = (stat->time_e[T_KERNEL] - stat->time_s[T_KERNEL] + stat->time_e[T_STEAL] - stat->time_s[T_STEAL]) / s; resn->cpu_io_wait = (stat->time_e[T_IOWAIT] - stat->time_s[T_IOWAIT]) / s; resn->cpu_total = resn->cpu_user + resn->cpu_intr + resn->cpu_kernel + resn->cpu_io_wait; } /* * Show relevant values. */ static void show_info(MEASURE measure) { if (measure == LATENCY) { view_time('a', "", "latency", Res.latency); view_rate('s', "", "msg_rate", Res.msg_rate); } else if (measure == MSG_RATE) { view_rate('a', "", "msg_rate", Res.msg_rate); } else if (measure == BANDWIDTH) { view_band('a', "", "bw", Res.recv_bw); view_rate('s', "", "msg_rate", Res.msg_rate); } else if (measure == BANDWIDTH_SR) { view_band('a', "", "send_bw", Res.send_bw); view_band('a', "", "recv_bw", Res.recv_bw); view_rate('s', "", "msg_rate", Res.msg_rate); } show_used(); view_cost('t', "", "send_cost", Res.send_cost); view_cost('t', "", "recv_cost", Res.recv_cost); show_rest(); if (Debug) show_debug(); } /* * Show parameters the user set. */ static void show_used(void) { PAR_NAME *p; PAR_NAME *q = endof(ParName); if (!VerboseUsed) return; for (p = ParName; p < q; ++p) { PAR_INFO *l = par_info(p->loc_i); PAR_INFO *r = par_info(p->rem_i); if (!l->inuse && !r->inuse) continue; if (VerboseUsed < 2 && !l->set & !r->set) continue; if (l->type == 'l') { uint32_t lv = *(uint32_t *)l->ptr; uint32_t rv = *(uint32_t *)r->ptr; if (lv == rv) view_long('u', "", p->name, lv); else { view_long('u', "loc_", p->name, lv); view_long('u', "rem_", p->name, rv); } } else if (l->type == 'p') { if (streq(l->ptr, r->ptr)) view_strn('u', "", p->name, l->ptr); else { view_strn('u', "loc_", p->name, l->ptr); view_strn('u', "rem_", p->name, r->ptr); } } else if (l->type == 's') { uint32_t lv = *(uint32_t *)l->ptr; uint32_t rv = *(uint32_t *)r->ptr; if (lv == rv) view_size('u', "", p->name, lv); else { view_size('u', "loc_", p->name, lv); view_size('u', "rem_", p->name, rv); } } else if (l->type == 't') { uint32_t lv = *(uint32_t *)l->ptr; uint32_t rv = *(uint32_t *)r->ptr; if (lv == rv) view_time('u', "", p->name, lv); else { view_time('u', "loc_", p->name, lv); view_time('u', "rem_", p->name, rv); } } } } /* * Show the remaining parameters. */ static void show_rest(void) { RESN *resnS; RESN *resnR; STAT *statS; STAT *statR; int srmode = 0; if (!UnifyNodes) { uint64_t ls = LStat.s.no_bytes; uint64_t lr = LStat.r.no_bytes; uint64_t rs = RStat.s.no_bytes; uint64_t rr = RStat.r.no_bytes; if (ls && !rs && rr && !lr) { srmode = 1; resnS = &Res.l; resnR = &Res.r; statS = &LStat; statR = &RStat; } else if (rs && !ls && lr && !rr) { srmode = 1; resnS = &Res.r; resnR = &Res.l; statS = &RStat; statR = &LStat; } } if (srmode) { view_cpus('t', "", "send_cpus_used", resnS->cpu_total); view_cpus('T', "", "send_cpus_user", resnS->cpu_user); view_cpus('T', "", "send_cpus_intr", resnS->cpu_intr); view_cpus('T', "", "send_cpus_kernel", resnS->cpu_kernel); view_cpus('T', "", "send_cpus_iowait", resnS->cpu_io_wait); view_time('T', "", "send_real_time", resnS->time_real); view_time('T', "", "send_cpu_time", resnS->time_cpu); view_long('S', "", "send_errors", statS->s.no_errs); view_size('S', "", "send_bytes", statS->s.no_bytes); view_long('S', "", "send_msgs", statS->s.no_msgs); view_long('S', "", "send_max_cqe", statS->max_cqes); view_cpus('t', "", "recv_cpus_used", resnR->cpu_total); view_cpus('T', "", "recv_cpus_user", resnR->cpu_user); view_cpus('T', "", "recv_cpus_intr", resnR->cpu_intr); view_cpus('T', "", "recv_cpus_kernel", resnR->cpu_kernel); view_cpus('T', "", "recv_cpus_iowait", resnR->cpu_io_wait); view_time('T', "", "recv_real_time", resnR->time_real); view_time('T', "", "recv_cpu_time", resnR->time_cpu); view_long('S', "", "recv_errors", statR->r.no_errs); view_size('S', "", "recv_bytes", statR->r.no_bytes); view_long('S', "", "recv_msgs", statR->r.no_msgs); view_long('S', "", "recv_max_cqe", statR->max_cqes); } else { view_cpus('t', "", "loc_cpus_used", Res.l.cpu_total); view_cpus('T', "", "loc_cpus_user", Res.l.cpu_user); view_cpus('T', "", "loc_cpus_intr", Res.l.cpu_intr); view_cpus('T', "", "loc_cpus_kernel", Res.l.cpu_kernel); view_cpus('T', "", "loc_cpus_iowait", Res.l.cpu_io_wait); view_time('T', "", "loc_real_time", Res.l.time_real); view_time('T', "", "loc_cpu_time", Res.l.time_cpu); view_long('S', "", "loc_send_errors", LStat.s.no_errs); view_long('S', "", "loc_recv_errors", LStat.r.no_errs); view_size('S', "", "loc_send_bytes", LStat.s.no_bytes); view_size('S', "", "loc_recv_bytes", LStat.r.no_bytes); view_long('S', "", "loc_send_msgs", LStat.s.no_msgs); view_long('S', "", "loc_recv_msgs", LStat.r.no_msgs); view_long('S', "", "loc_max_cqe", LStat.max_cqes); view_cpus('t', "", "rem_cpus_used", Res.r.cpu_total); view_cpus('T', "", "rem_cpus_user", Res.r.cpu_user); view_cpus('T', "", "rem_cpus_intr", Res.r.cpu_intr); view_cpus('T', "", "rem_cpus_kernel", Res.r.cpu_kernel); view_cpus('T', "", "rem_cpus_iowait", Res.r.cpu_io_wait); view_time('T', "", "rem_real_time", Res.r.time_real); view_time('T', "", "rem_cpu_time", Res.r.time_cpu); view_long('S', "", "rem_send_errors", RStat.s.no_errs); view_long('S', "", "rem_recv_errors", RStat.r.no_errs); view_size('S', "", "rem_send_bytes", RStat.s.no_bytes); view_size('S', "", "rem_recv_bytes", RStat.r.no_bytes); view_long('S', "", "rem_send_msgs", RStat.s.no_msgs); view_long('S', "", "rem_recv_msgs", RStat.r.no_msgs); view_long('S', "", "rem_max_cqe", RStat.max_cqes); } } /* * Show all values. */ static void show_debug(void) { /* Local node */ view_long('d', "", "l_no_cpus", LStat.no_cpus); view_long('d', "", "l_no_ticks", LStat.no_ticks); view_long('d', "", "l_max_cqes", LStat.max_cqes); if (LStat.no_ticks) { double t = LStat.no_ticks; CLOCK *s = LStat.time_s; CLOCK *e = LStat.time_e; double real = (e[T_REAL] - s[T_REAL]) / t; double user = (e[T_USER] - s[T_USER]) / t; double nice = (e[T_NICE] - s[T_NICE]) / t; double system = (e[T_KERNEL] - s[T_KERNEL]) / t; double idle = (e[T_IDLE] - s[T_IDLE]) / t; double iowait = (e[T_IOWAIT] - s[T_IOWAIT]) / t; double irq = (e[T_IRQ] - s[T_IRQ]) / t; double softirq = (e[T_SOFTIRQ] - s[T_SOFTIRQ]) / t; double steal = (e[T_STEAL] - s[T_STEAL]) / t; view_time('d', "", "l_timer_real", real); view_time('d', "", "l_timer_user", user); view_time('d', "", "l_timer_nice", nice); view_time('d', "", "l_timer_system", system); view_time('d', "", "l_timer_idle", idle); view_time('d', "", "l_timer_iowait", iowait); view_time('d', "", "l_timer_irq", irq); view_time('d', "", "l_timer_softirq", softirq); view_time('d', "", "l_timer_steal", steal); } view_size('d', "", "l_s_no_bytes", LStat.s.no_bytes); view_long('d', "", "l_s_no_msgs", LStat.s.no_msgs); view_long('d', "", "l_s_no_errs", LStat.s.no_errs); view_size('d', "", "l_r_no_bytes", LStat.r.no_bytes); view_long('d', "", "l_r_no_msgs", LStat.r.no_msgs); view_long('d', "", "l_r_no_errs", LStat.r.no_errs); view_size('d', "", "l_rem_s_no_bytes", LStat.rem_s.no_bytes); view_long('d', "", "l_rem_s_no_msgs", LStat.rem_s.no_msgs); view_long('d', "", "l_rem_s_no_errs", LStat.rem_s.no_errs); view_size('d', "", "l_rem_r_no_bytes", LStat.rem_r.no_bytes); view_long('d', "", "l_rem_r_no_msgs", LStat.rem_r.no_msgs); view_long('d', "", "l_rem_r_no_errs", LStat.rem_r.no_errs); /* Remote node */ view_long('d', "", "r_no_cpus", RStat.no_cpus); view_long('d', "", "r_no_ticks", RStat.no_ticks); view_long('d', "", "r_max_cqes", RStat.max_cqes); if (RStat.no_ticks) { double t = RStat.no_ticks; CLOCK *s = RStat.time_s; CLOCK *e = RStat.time_e; double real = (e[T_REAL] - s[T_REAL]) / t; double user = (e[T_USER] - s[T_USER]) / t; double nice = (e[T_NICE] - s[T_NICE]) / t; double system = (e[T_KERNEL] - s[T_KERNEL]) / t; double idle = (e[T_IDLE] - s[T_IDLE]) / t; double iowait = (e[T_IOWAIT] - s[T_IOWAIT]) / t; double irq = (e[T_IRQ] - s[T_IRQ]) / t; double softirq = (e[T_SOFTIRQ] - s[T_SOFTIRQ]) / t; double steal = (e[T_STEAL] - s[T_STEAL]) / t; view_time('d', "", "r_timer_real", real); view_time('d', "", "r_timer_user", user); view_time('d', "", "r_timer_nice", nice); view_time('d', "", "r_timer_system", system); view_time('d', "", "r_timer_idle", idle); view_time('d', "", "r_timer_iowait", iowait); view_time('d', "", "r_timer_irq", irq); view_time('d', "", "r_timer_softirq", softirq); view_time('d', "", "r_timer_steal", steal); } view_size('d', "", "r_s_no_bytes", RStat.s.no_bytes); view_long('d', "", "r_s_no_msgs", RStat.s.no_msgs); view_long('d', "", "r_s_no_errs", RStat.s.no_errs); view_size('d', "", "r_r_no_bytes", RStat.r.no_bytes); view_long('d', "", "r_r_no_msgs", RStat.r.no_msgs); view_long('d', "", "r_r_no_errs", RStat.r.no_errs); view_size('d', "", "r_rem_s_no_bytes", RStat.rem_s.no_bytes); view_long('d', "", "r_rem_s_no_msgs", RStat.rem_s.no_msgs); view_long('d', "", "r_rem_s_no_errs", RStat.rem_s.no_errs); view_size('d', "", "r_rem_r_no_bytes", RStat.rem_r.no_bytes); view_long('d', "", "r_rem_r_no_msgs", RStat.rem_r.no_msgs); view_long('d', "", "r_rem_r_no_errs", RStat.rem_r.no_errs); } /* * Show a cost in terms of seconds per gigabyte. */ static void view_cost(int type, char *pref, char *name, double value) { int n = 0; char *tab[] ={ "ns/GB", "us/GB", "ms/GB", "sec/GB" }; value *= 1E9; if (!verbose(type, value)) return; if (!UnifyUnits) { while (value >= 1000 && n < (int)cardof(tab)-1) { value /= 1000; ++n; } } place_val(pref, name, tab[n], value); } /* * Show the number of cpus. */ static void view_cpus(int type, char *pref, char *name, double value) { value *= 100; if (!verbose(type, value)) return; place_val(pref, name, "% cpus", value); } /* * Show a messaging rate. */ static void view_rate(int type, char *pref, char *name, double value) { int n = 0; char *tab[] ={ "/sec", "K/sec", "M/sec", "G/sec", "T/sec" }; if (!verbose(type, value)) return; if (!UnifyUnits) { while (value >= 1000 && n < (int)cardof(tab)-1) { value /= 1000; ++n; } } place_val(pref, name, tab[n], value); } /* * Show a number. */ static void view_long(int type, char *pref, char *name, long long value) { int n = 0; double val = value; char *tab[] ={ "", "thousand", "million", "billion", "trillion" }; if (!verbose(type, val)) return; if (!UnifyUnits && val >= 1000*1000) { while (val >= 1000 && n < (int)cardof(tab)-1) { val /= 1000; ++n; } } place_val(pref, name, tab[n], val); } /* * Show a bandwidth value. */ static void view_band(int type, char *pref, char *name, double value) { int n, s; char **tab; if (!verbose(type, value)) return; if (UseBitsPerSec) { static char *t[] ={ "bits/sec", "Kb/sec", "Mb/sec", "Gb/sec", "Tb/sec" }; s = cardof(t); tab = t; value *= 8; } else { static char *t[] ={ "bytes/sec", "KB/sec", "MB/sec", "GB/sec", "TB/sec" }; s = cardof(t); tab = t; } n = 0; if (!UnifyUnits) { while (value >= 1000 && n < s-1) { value /= 1000; ++n; } } place_val(pref, name, tab[n], value); } /* * Show a size. */ static void view_size(int type, char *pref, char *name, long long value) { int n = 0; double val = value; char *tab[] ={ "bytes", "KB", "MB", "GB", "TB" }; if (!verbose(type, val)) return; if (!UnifyUnits) { if (nice_1024(pref, name, value)) return; while (val >= 1000 && n < (int)cardof(tab)-1) { val /= 1000; ++n; } } place_val(pref, name, tab[n], val); } /* * Show a number if it can be expressed as a nice multiple of a power of 1024. */ static int nice_1024(char *pref, char *name, long long value) { char *data; char *altn; int n = 0; long long val = value; char *tab[] ={ "KiB", "MiB", "GiB", "TiB" }; if (val < 1024 || val % 1024) return 0; val /= 1024; while (val >= 1024 && n < (int)cardof(tab)-1) { if (val % 1024) return 0; val /= 1024; ++n; } data = qasprintf("%lld", val); altn = qasprintf("%lld", value); place_any(pref, name, tab[n], commify(data), commify(altn)); return 1; } /* * Show a string. */ static void view_strn(int type, char *pref, char *name, char *value) { if (!verbose(type, value[0] != '\0')) return; place_any(pref, name, 0, strdup(value), 0); } /* * Show a time. */ static void view_time(int type, char *pref, char *name, double value) { int n = 0; char *tab[] ={ "ns", "us", "ms", "sec" }; value *= 1E9; if (!verbose(type, value)) return; if (!UnifyUnits) { while (value >= 1000 && n < (int)cardof(tab)-1) { value /= 1000; ++n; } } place_val(pref, name, tab[n], value); } /* * Determine if we are verbose enough to show a value. */ static int verbose(int type, double value) { if (type == 'a') return 1; if (value <= 0) return 0; switch (type) { case 'd': return Debug; case 'c': return VerboseConf >= 1; case 's': return VerboseStat >= 1; case 't': return VerboseTime >= 1; case 'u': return VerboseUsed >= 1; case 'C': return VerboseConf >= 2; case 'S': return VerboseStat >= 2; case 'T': return VerboseTime >= 2; case 'U': return VerboseUsed >= 2; default: error(BUG, "verbose: bad type: %c (%o)", type, type); } return 0; } /* * Place a value to be shown later. */ static void place_val(char *pref, char *name, char *unit, double value) { char *data = qasprintf("%.0f", value); char *p = data; int n = Precision; if (*p == '-') ++p; while (isdigit(*p++)) --n; if (n > 0) { free(data); data = qasprintf("%.*f", n, value); p = &data[strlen(data)]; while (p > data && *--p == '0') ; if (p > data && *p == '.') --p; p[1] = '\0'; } place_any(pref, name, unit, commify(data), 0); } /* * Place an entry in our show table. */ static void place_any(char *pref, char *name, char *unit, char *data, char *altn) { SHOW *show = &ShowTable[ShowIndex++]; if (ShowIndex > cardof(ShowTable)) error(BUG, "need to increase size of ShowTable"); show->pref = pref; show->name = name; show->unit = unit; show->data = data; show->altn = altn; } /* * Show all saved values. */ static void place_show(void) { int i; int nameLen = 0; int dataLen = 0; int unitLen = 0; /* First compute formating sizes */ for (i = 0; i < ShowIndex; ++i) { int n; SHOW *show = &ShowTable[i]; n = (show->pref ? strlen(show->pref) : 0) + strlen(show->name); if (n > nameLen) nameLen = n; n = strlen(show->data); if (show->unit) { if (n > dataLen) dataLen = n; n = strlen(show->unit); if (n > unitLen) unitLen = n; } } /* Then display results */ for (i = 0; i < ShowIndex; ++i) { int n = 0; SHOW *show = &ShowTable[i]; printf(" "); if (show->pref) { n = strlen(show->pref); printf("%s", show->pref); } printf("%-*s", nameLen-n, show->name); if (show->unit) { printf(" = %*s", dataLen, show->data); printf(" %s", show->unit); } else printf(" = %s", show->data); if (show->altn) printf(" (%s)", show->altn); printf("\n"); free(show->data); free(show->altn); } ShowIndex = 0; } /* * Set the processor affinity. */ static void set_affinity(void) { cpu_set_t set; int a = Req.affinity; if (!a) return; CPU_ZERO(&set); CPU_SET(a-1, &set); if (sched_setaffinity(0, sizeof(set), &set) < 0) error(SYS, "cannot set processor affinity (cpu %d)", a-1); } /* * Encode a REQ structure into a data stream. */ static void enc_req(REQ *host) { enc_int(host->ver_maj, sizeof(host->ver_maj)); enc_int(host->ver_min, sizeof(host->ver_min)); enc_int(host->ver_inc, sizeof(host->ver_inc)); enc_int(host->req_index, sizeof(host->req_index)); enc_int(host->access_recv, sizeof(host->access_recv)); enc_int(host->affinity, sizeof(host->affinity)); enc_int(host->alt_port, sizeof(host->alt_port)); enc_int(host->flip, sizeof(host->flip)); enc_int(host->msg_size, sizeof(host->msg_size)); enc_int(host->mtu_size, sizeof(host->mtu_size)); enc_int(host->no_msgs, sizeof(host->no_msgs)); enc_int(host->poll_mode, sizeof(host->poll_mode)); enc_int(host->port, sizeof(host->port)); enc_int(host->rd_atomic, sizeof(host->rd_atomic)); enc_int(host->sl, sizeof(host->sl)); enc_int(host->sock_buf_size, sizeof(host->sock_buf_size)); enc_int(host->src_path_bits, sizeof(host->src_path_bits)); enc_int(host->time, sizeof(host->time)); enc_int(host->timeout, sizeof(host->timeout)); enc_int(host->use_cm, sizeof(host->use_cm)); enc_str(host->id, sizeof(host->id)); enc_str(host->static_rate, sizeof(host->static_rate)); } /* * Decode the version part of a REQ structure from a data stream. To decode * the entire REQ structure, call dec_req_version and dec_req_data in * succession. */ static void dec_req_version(REQ *host) { host->ver_maj = dec_int(sizeof(host->ver_maj)); host->ver_min = dec_int(sizeof(host->ver_min)); host->ver_inc = dec_int(sizeof(host->ver_inc)); } /* * Decode the data part of a REQ structure from a data stream. */ static void dec_req_data(REQ *host) { host->req_index = dec_int(sizeof(host->req_index)); host->access_recv = dec_int(sizeof(host->access_recv)); host->affinity = dec_int(sizeof(host->affinity)); host->alt_port = dec_int(sizeof(host->alt_port)); host->flip = dec_int(sizeof(host->flip)); host->msg_size = dec_int(sizeof(host->msg_size)); host->mtu_size = dec_int(sizeof(host->mtu_size)); host->no_msgs = dec_int(sizeof(host->no_msgs)); host->poll_mode = dec_int(sizeof(host->poll_mode)); host->port = dec_int(sizeof(host->port)); host->rd_atomic = dec_int(sizeof(host->rd_atomic)); host->sl = dec_int(sizeof(host->sl)); host->sock_buf_size = dec_int(sizeof(host->sock_buf_size)); host->src_path_bits = dec_int(sizeof(host->src_path_bits)); host->time = dec_int(sizeof(host->time)); host->timeout = dec_int(sizeof(host->timeout)); host->use_cm = dec_int(sizeof(host->use_cm)); dec_str(host->id, sizeof(host->id)); dec_str(host->static_rate,sizeof(host->static_rate)); } /* * Encode a STAT structure into a data stream. */ static void enc_stat(STAT *host) { int i; enc_int(host->no_cpus, sizeof(host->no_cpus)); enc_int(host->no_ticks, sizeof(host->no_ticks)); enc_int(host->max_cqes, sizeof(host->max_cqes)); for (i = 0; i < T_N; ++i) enc_int(host->time_s[i], sizeof(host->time_s[i])); for (i = 0; i < T_N; ++i) enc_int(host->time_e[i], sizeof(host->time_e[i])); enc_ustat(&host->s); enc_ustat(&host->r); enc_ustat(&host->rem_s); enc_ustat(&host->rem_r); } /* * Decode a STAT structure from a data stream. */ static void dec_stat(STAT *host) { int i; host->no_cpus = dec_int(sizeof(host->no_cpus)); host->no_ticks = dec_int(sizeof(host->no_ticks)); host->max_cqes = dec_int(sizeof(host->max_cqes)); for (i = 0; i < T_N; ++i) host->time_s[i] = dec_int(sizeof(host->time_s[i])); for (i = 0; i < T_N; ++i) host->time_e[i] = dec_int(sizeof(host->time_e[i])); dec_ustat(&host->s); dec_ustat(&host->r); dec_ustat(&host->rem_s); dec_ustat(&host->rem_r); } /* * Encode a USTAT structure into a data stream. */ static void enc_ustat(USTAT *host) { enc_int(host->no_bytes, sizeof(host->no_bytes)); enc_int(host->no_msgs, sizeof(host->no_msgs)); enc_int(host->no_errs, sizeof(host->no_errs)); } /* * Decode a USTAT structure from a data stream. */ static void dec_ustat(USTAT *host) { host->no_bytes = dec_int(sizeof(host->no_bytes)); host->no_msgs = dec_int(sizeof(host->no_msgs)); host->no_errs = dec_int(sizeof(host->no_errs)); } /* * Get various temporal parameters. */ static void get_times(CLOCK timex[T_N]) { int n; char *p; char buf[BUFSIZE]; struct tms tms; timex[0] = times(&tms); if (lseek(ProcStatFD, 0, 0) < 0) error(SYS, "failed to seek /proc/stat"); n = read(ProcStatFD, buf, sizeof(buf)-1); buf[n] = '\0'; if (strncmp(buf, "cpu ", 4)) error(0, "/proc/stat does not start with 'cpu '"); p = &buf[3]; for (n = 1; n < T_N; ++n) { while (*p == ' ') ++p; if (!isdigit(*p)) { if (*p != '\n' || n < T_N-1) error(0, "/proc/stat has bad format"); break; } timex[n] = strtoll(p, 0, 10); while (*p != ' ' && *p != '\n' && *p != '\0') ++p; } while (n < T_N) timex[n++] = 0; } /* * Insert commas within a number for readability. */ static char * commify(char *data) { int s; int d; int seqS; int seqE; int dataLen; int noCommas; if (!data) return data; if (UnifyUnits) return data; dataLen = strlen(data); seqS = seqE = dataLen; while (--seqS >= 0) if (!isdigit(data[seqS])) break; if (seqS >= 0 && data[seqS] == '.') { seqE = seqS; while (--seqS >= 0) if (!isdigit(data[seqS])) break; } noCommas = (--seqE - ++seqS) / 3; if (noCommas == 0) return data; data = realloc(data, dataLen+noCommas+1); if (!data) error(0, "out of space"); s = dataLen; d = dataLen + noCommas; for (;;) { int n; data[d--] = data[s--]; n = seqE - s; if (n > 0 && n%3 == 0) { data[d--] = ','; if (--noCommas == 0) break; } } return data; } /* * Like strncpy but ensures the destination is null terminated. */ static void strncopy(char *d, char *s, int n) { strncpy(d, s, n); d[n-1] = '\0'; } qperf-0.4.10/src/qperf.h000066400000000000000000000265241313370502100147720ustar00rootroot00000000000000/* * qperf - general header file. * * Copyright (c) 2002-2009 Johann George. All rights reserved. * Copyright (c) 2006-2009 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include /* * Parameters. */ #define STRSIZE 64 /* * For convenience and readability. */ #define SUCCESS0 0 #define cardof(a) (sizeof(a)/sizeof(*a)) #define endof(a) (&a[cardof(a)]) #define streq(a, b) (strcmp(a, b) == 0) #define offset(t, e) ((long)&((t *)0)->e) #define is_client() (ServerName != 0) #define is_sender() (Req.flip ? !is_client() : is_client()) /* * Type definitions. */ typedef uint64_t CLOCK; typedef struct addrinfo AI; typedef struct sockaddr SA; typedef struct sockaddr_storage SS; /* * Error actions. */ #define BUG 1 /* Internal error */ #define SYS 2 /* System error */ #define RET 4 /* Return, don't exit */ /* * Time indices. */ typedef enum { T_REAL, T_USER, T_NICE, T_KERNEL, T_IDLE, T_IOWAIT, T_IRQ, T_SOFTIRQ, T_STEAL, T_N } TIME_INDEX; /* * Parameter indices. P_NULL must be 0. */ typedef enum { P_NULL, L_ACCESS_RECV, R_ACCESS_RECV, L_AFFINITY, R_AFFINITY, L_ALT_PORT, R_ALT_PORT, L_FLIP, R_FLIP, L_ID, R_ID, L_MSG_SIZE, R_MSG_SIZE, L_MTU_SIZE, R_MTU_SIZE, L_NO_MSGS, R_NO_MSGS, L_POLL_MODE, R_POLL_MODE, L_PORT, R_PORT, L_RD_ATOMIC, R_RD_ATOMIC, L_SL, R_SL, L_SOCK_BUF_SIZE, R_SOCK_BUF_SIZE, L_SRC_PATH_BITS, R_SRC_PATH_BITS, L_STATIC_RATE, R_STATIC_RATE, L_TIME, R_TIME, L_TIMEOUT, R_TIMEOUT, L_USE_CM, R_USE_CM, P_N } PAR_INDEX; /* * What we are measuring. */ typedef enum { LATENCY, MSG_RATE, BANDWIDTH, BANDWIDTH_SR } MEASURE; /* * Request to the server. Note that most of these must be of type uint32_t * because of the way options are set. The minor version must be changed if * there is a change to this data structure. Do not move or change the first * four elements. */ typedef struct REQ { uint16_t ver_maj; /* Major version */ uint16_t ver_min; /* Minor version */ uint16_t ver_inc; /* Incremental version */ uint16_t req_index; /* Request index (into Tests) */ uint32_t access_recv; /* Access data after receiving */ uint32_t affinity; /* Processor affinity */ uint32_t alt_port; /* Alternate path port number */ uint32_t flip; /* Flip sender/receiver */ uint32_t msg_size; /* Message Size */ uint32_t mtu_size; /* MTU Size */ uint32_t no_msgs; /* Number of messages */ uint32_t poll_mode; /* Poll mode */ uint32_t port; /* Port number requested */ uint32_t rd_atomic; /* Number of pending RDMA or atomics */ uint32_t sl; /* Service level */ uint32_t sock_buf_size; /* Socket buffer size */ uint32_t src_path_bits; /* Source path bits */ uint32_t time; /* Duration in seconds */ uint32_t timeout; /* Timeout for messages */ uint32_t use_cm; /* Use Connection Manager */ char id[STRSIZE]; /* Identifier */ char static_rate[STRSIZE]; /* Static rate */ } REQ; /* * Transfer statistics. */ typedef struct USTAT { uint64_t no_bytes; /* Number of bytes transfered */ uint64_t no_msgs; /* Number of messages */ uint64_t no_errs; /* Number of errors */ } USTAT; /* * Statistics. */ typedef struct STAT { uint32_t no_cpus; /* Number of processors */ uint32_t no_ticks; /* Ticks per second */ uint32_t max_cqes; /* Maximum CQ entries */ CLOCK time_s[T_N]; /* Start times */ CLOCK time_e[T_N]; /* End times */ USTAT s; /* Send statistics */ USTAT r; /* Receive statistics */ USTAT rem_s; /* Remote send statistics */ USTAT rem_r; /* Remote receive statistics */ } STAT; /* * Results per node. */ typedef struct RESN { double time_real; /* Real (elapsed) time in seconds */ double time_cpu; /* Cpu time in seconds */ double cpu_total; /* Cpu time (as a fraction of a cpu) */ double cpu_user; /* User time (fraction of cpu) */ double cpu_intr; /* Interrupt time (fraction of cpu) */ double cpu_idle; /* Idle time (fraction of cpu) */ double cpu_kernel; /* Kernel time (fraction of cpu) */ double cpu_io_wait; /* IO wait time (fraction of cpu) */ } RESN; /* * Results. */ typedef struct RES { RESN l; /* Local information */ RESN r; /* Remote information */ double send_bw; /* Send bandwidth */ double recv_bw; /* Receive bandwidth */ double msg_rate; /* Messaging rate */ double send_cost; /* Send cost */ double recv_cost; /* Receive cost */ double latency; /* Latency */ } RES; /* * Functions prototypes in qperf.c. */ void client_send_request(void); void exchange_results(void); int left_to_send(long *sentp, int room); void opt_check(void); void par_use(PAR_INDEX index); int recv_mesg(void *ptr, int len, char *item); int send_mesg(void *ptr, int len, char *item); void set_finished(void); void setp_u32(char *name, PAR_INDEX index, uint32_t l); void setp_str(char *name, PAR_INDEX index, char *s); void setv_u32(PAR_INDEX index, uint32_t l); void show_results(MEASURE measure); void stop_test_timer(void); void sync_test(void); /* * Functions prototypes in support.c. */ void check_remote_error(void); void debug(char *fmt, ...); void dec_init(void *p); int64_t dec_int(int n); void dec_str(char *s, int n); uint32_t decode_uint32(uint32_t *p); void die(void); void enc_init(void *p); void enc_int(int64_t l, int n); void enc_str(char *s, int n); void encode_uint32(uint32_t *p, uint32_t v); int error(int actions, char *fmt, ...); AI *getaddrinfo_port(char *node, int port, AI *hints); char *qasprintf(char *fmt, ...); void *qmalloc(long n); void recv_sync(char *msg); void send_sync(char *msg); void setsockopt_one(int fd, int optname); void synchronize(char *msg); void touch_data(void *p, int n); void urgent(void); /* * Socket tests in socket.c. */ void run_client_rds_bw(void); void run_server_rds_bw(void); void run_client_rds_lat(void); void run_server_rds_lat(void); void run_client_sctp_bw(void); void run_server_sctp_bw(void); void run_client_sctp_lat(void); void run_server_sctp_lat(void); void run_client_sdp_bw(void); void run_server_sdp_bw(void); void run_client_sdp_lat(void); void run_server_sdp_lat(void); void run_client_tcp_bw(void); void run_server_tcp_bw(void); void run_client_tcp_lat(void); void run_server_tcp_lat(void); void run_client_udp_bw(void); void run_server_udp_bw(void); void run_client_udp_lat(void); void run_server_udp_lat(void); /* * RDMA tests in rdma.c. */ void run_client_bug(void); void run_server_bug(void); void run_client_rc_bi_bw(void); void run_server_rc_bi_bw(void); void run_client_rc_bw(void); void run_server_rc_bw(void); void run_client_rc_compare_swap_mr(void); void run_server_rc_compare_swap_mr(void); void run_client_rc_fetch_add_mr(void); void run_server_rc_fetch_add_mr(void); void run_client_rc_lat(void); void run_server_rc_lat(void); void run_client_rc_rdma_read_bw(void); void run_server_rc_rdma_read_bw(void); void run_client_rc_rdma_read_lat(void); void run_server_rc_rdma_read_lat(void); void run_client_rc_rdma_write_bw(void); void run_server_rc_rdma_write_bw(void); void run_client_rc_rdma_write_lat(void); void run_server_rc_rdma_write_lat(void); void run_client_rc_rdma_write_poll_lat(void); void run_server_rc_rdma_write_poll_lat(void); void run_client_uc_bi_bw(void); void run_server_uc_bi_bw(void); void run_client_uc_bw(void); void run_server_uc_bw(void); void run_client_uc_lat(void); void run_server_uc_lat(void); void run_client_uc_rdma_write_bw(void); void run_server_uc_rdma_write_bw(void); void run_client_uc_rdma_write_lat(void); void run_server_uc_rdma_write_lat(void); void run_client_uc_rdma_write_poll_lat(void); void run_server_uc_rdma_write_poll_lat(void); void run_client_ud_bi_bw(void); void run_server_ud_bi_bw(void); void run_client_ud_bw(void); void run_server_ud_bw(void); void run_client_ud_lat(void); void run_server_ud_lat(void); void run_client_ver_rc_compare_swap(void); void run_server_ver_rc_compare_swap(void); void run_client_ver_rc_fetch_add(void); void run_server_ver_rc_fetch_add(void); void run_client_xrc_bi_bw(void); void run_server_xrc_bi_bw(void); void run_client_xrc_bw(void); void run_server_xrc_bw(void); void run_client_xrc_lat(void); void run_server_xrc_lat(void); /* * Variables. */ extern RES Res; extern REQ Req; extern STAT LStat; extern char *Usage[]; extern char *TestName; extern char *ServerName; extern SS ServerAddr; extern int ServerAddrLen; extern int RemoteFD; extern int Debug; extern volatile int Finished; qperf-0.4.10/src/rdma.c000066400000000000000000002025141313370502100145660ustar00rootroot00000000000000/* * qperf - handle RDMA tests. * * Copyright (c) 2002-2009 Johann George. All rights reserved. * Copyright (c) 2006-2009 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include "qperf.h" /* * RDMA parameters. */ #define QKEY 0x11111111 /* Q_Key */ #define NCQE 1024 /* Number of CQ entries */ #define GRH_SIZE 40 /* InfiniBand GRH size */ #define MTU_SIZE 2048 /* Default MTU Size */ #define RETRY_CNT 7 /* RC retry count */ #define RNR_RETRY_CNT 7 /* RC RNR retry count */ #define MIN_RNR_TIMER 12 /* RC Minimum RNR timer */ #define LOCAL_ACK_TIMEOUT 14 /* RC local ACK timeout */ /* * Work request IDs. */ #define WRID_SEND 1 /* Send */ #define WRID_RECV 2 /* Receive */ #define WRID_RDMA 3 /* RDMA */ /* * Constants. */ #define K2 (2*1024) #define K64 (64*1024) /* * For convenience. */ typedef enum ibv_wr_opcode ibv_op; typedef struct ibv_comp_channel ibv_cc; typedef struct ibv_xrc_domain ibv_xrc; /* * Atomic operations. */ typedef enum ATOMIC { COMPARE_SWAP, FETCH_ADD } ATOMIC; /* * IO Mode. */ typedef enum IOMODE { IO_SR, /* Send/Receive */ IO_RDMA /* RDMA */ } IOMODE; /* * Information specific to a node. */ typedef struct NODE { uint64_t vaddr; /* Virtual address */ uint32_t lid; /* Local ID */ uint32_t qpn; /* Queue pair number */ uint32_t psn; /* Packet sequence number */ uint32_t srqn; /* Shared queue number */ uint32_t rkey; /* Remote key */ uint32_t alt_lid; /* Alternate Path Local LID */ uint32_t rd_atomic; /* Number of read/atomics supported */ } NODE; /* * InfiniBand specific information. */ typedef struct IBINFO { int mtu; /* MTU */ int port; /* Port */ int rate; /* Static rate */ struct ibv_context *context; /* Context */ struct ibv_device **devlist; /* Device list */ } IBINFO; /* * Connection Manager specific information. */ typedef struct CMINFO { struct rdma_event_channel *channel; /* Channel */ struct rdma_cm_id *id; /* RDMA id */ struct rdma_cm_event *event; /* Event */ } CMINFO; /* * RDMA device descriptor. */ typedef struct DEVICE { NODE lnode; /* Local node information */ NODE rnode; /* Remote node information */ IBINFO ib; /* InfiniBand information */ CMINFO cm; /* Connection Manager information */ uint32_t qkey; /* Q Key for UD */ int trans; /* QP transport */ int msg_size; /* Message size */ int buf_size; /* Buffer size */ int max_send_wr; /* Maximum send work requests */ int max_recv_wr; /* Maximum receive work requests */ int max_inline; /* Maximum amount of inline data */ char *buffer; /* Buffer */ ibv_cc *channel; /* Channel */ struct ibv_pd *pd; /* Protection domain */ struct ibv_mr *mr; /* Memory region */ struct ibv_cq *cq; /* Completion queue */ struct ibv_qp *qp; /* Queue Pair */ struct ibv_ah *ah; /* Address handle */ struct ibv_srq *srq; /* Shared receive queue */ ibv_xrc *xrc; /* XRC domain */ } DEVICE; /* * Names associated with a value. */ typedef struct NAMES { int value; /* Value */ char *name; /* Name */ } NAMES; /* * RDMA speeds and names. */ typedef struct RATES { const char *name; /* Name */ uint32_t rate; /* Rate */ } RATES; /* * Function prototypes. */ static void atomic_seq(ATOMIC atomic, int i, uint64_t *value, uint64_t *args); static void cm_ack_event(DEVICE *dev); static void cm_close(DEVICE *dev); static char *cm_event_name(int event, char *data, int size); static void cm_expect_event(DEVICE *dev, int expected); static void cm_init(DEVICE *dev); static void cm_open(DEVICE *dev); static void cm_open_client(DEVICE *dev); static void cm_open_server(DEVICE *dev); static void cm_prep(DEVICE *dev); static void cq_error(int status); static void dec_node(NODE *host); static void do_error(int status, uint64_t *errors); static void enc_node(NODE *host); static void ib_client_atomic(ATOMIC atomic); static void ib_client_verify_atomic(ATOMIC atomic); static void ib_close1(DEVICE *dev); static void ib_close2(DEVICE *dev); static void ib_migrate(DEVICE *dev); static void ib_open(DEVICE *dev); static void ib_post_atomic(DEVICE *dev, ATOMIC atomic, int wrid, int offset, uint64_t compare_add, uint64_t swap); static void ib_prep(DEVICE *dev); static void rd_bi_bw(int transport); static void rd_client_bw(int transport); static void rd_client_rdma_bw(int transport, ibv_op opcode); static void rd_client_rdma_read_lat(int transport); static void rd_close(DEVICE *dev); static void rd_mralloc(DEVICE *dev, int size); static void rd_mrfree(DEVICE *dev); static void rd_open(DEVICE *dev, int trans, int max_send_wr, int max_recv_wr); static void rd_params(int transport, long msg_size, int poll, int atomic); static int rd_poll(DEVICE *dev, struct ibv_wc *wc, int nwc); static void rd_post_rdma_std(DEVICE *dev, ibv_op opcode, int n); static void rd_post_recv_std(DEVICE *dev, int n); static void rd_post_send(DEVICE *dev, int off, int len, int inc, int rep, int stat); static void rd_post_send_std(DEVICE *dev, int n); static void rd_pp_lat(int transport, IOMODE iomode); static void rd_pp_lat_loop(DEVICE *dev, IOMODE iomode); static void rd_prep(DEVICE *dev, int size); static void rd_rdma_write_poll_lat(int transport); static void rd_server_def(int transport); static void rd_server_nop(int transport, int size); static int maybe(int val, char *msg); static char *opcode_name(int opcode); static void show_node_info(DEVICE *dev); /* * List of errors we can get from a CQE. */ NAMES CQErrors[] ={ { IBV_WC_SUCCESS, "Success" }, { IBV_WC_LOC_LEN_ERR, "Local length error" }, { IBV_WC_LOC_QP_OP_ERR, "Local QP operation failure" }, { IBV_WC_LOC_EEC_OP_ERR, "Local EEC operation failure" }, { IBV_WC_LOC_PROT_ERR, "Local protection error" }, { IBV_WC_WR_FLUSH_ERR, "WR flush failure" }, { IBV_WC_MW_BIND_ERR, "Memory window bind failure" }, { IBV_WC_BAD_RESP_ERR, "Bad response" }, { IBV_WC_LOC_ACCESS_ERR, "Local access failure" }, { IBV_WC_REM_INV_REQ_ERR, "Remote invalid request" }, { IBV_WC_REM_ACCESS_ERR, "Remote access failure" }, { IBV_WC_REM_OP_ERR, "Remote operation failure" }, { IBV_WC_RETRY_EXC_ERR, "Retries exceeded" }, { IBV_WC_RNR_RETRY_EXC_ERR, "RNR retry exceeded" }, { IBV_WC_LOC_RDD_VIOL_ERR, "Local RDD violation" }, { IBV_WC_REM_INV_RD_REQ_ERR, "Remote invalid read request" }, { IBV_WC_REM_ABORT_ERR, "Remote abort" }, { IBV_WC_INV_EECN_ERR, "Invalid EECN" }, { IBV_WC_INV_EEC_STATE_ERR, "Invalid EEC state" }, { IBV_WC_FATAL_ERR, "Fatal error" }, { IBV_WC_RESP_TIMEOUT_ERR, "Responder timeout" }, { IBV_WC_GENERAL_ERR, "General error" }, }; /* * Opcodes. */ NAMES Opcodes[] ={ { IBV_WR_ATOMIC_CMP_AND_SWP, "compare and swap" }, { IBV_WR_ATOMIC_FETCH_AND_ADD, "fetch and add" }, { IBV_WR_RDMA_READ, "rdma read" }, { IBV_WR_RDMA_WRITE, "rdma write" }, { IBV_WR_RDMA_WRITE_WITH_IMM, "rdma write with immediate" }, { IBV_WR_SEND, "send" }, { IBV_WR_SEND_WITH_IMM, "send with immediate" }, }; /* * Events from the Connection Manager. */ NAMES CMEvents[] ={ { RDMA_CM_EVENT_ADDR_RESOLVED, "Address resolved" }, { RDMA_CM_EVENT_ADDR_ERROR, "Address error" }, { RDMA_CM_EVENT_ROUTE_RESOLVED, "Route resolved" }, { RDMA_CM_EVENT_ROUTE_ERROR, "Route error" }, { RDMA_CM_EVENT_CONNECT_REQUEST, "Connect request" }, { RDMA_CM_EVENT_CONNECT_RESPONSE, "Connect response" }, { RDMA_CM_EVENT_CONNECT_ERROR, "Connect error" }, { RDMA_CM_EVENT_UNREACHABLE, "Event unreachable" }, { RDMA_CM_EVENT_REJECTED, "Event rejected" }, { RDMA_CM_EVENT_ESTABLISHED, "Event established" }, { RDMA_CM_EVENT_DISCONNECTED, "Event disconnected" }, { RDMA_CM_EVENT_DEVICE_REMOVAL, "Device removal" }, { RDMA_CM_EVENT_MULTICAST_JOIN, "Multicast join" }, { RDMA_CM_EVENT_MULTICAST_ERROR, "Multicast error" }, }; /* * Opcodes. */ RATES Rates[] ={ { "", IBV_RATE_MAX }, { "max", IBV_RATE_MAX }, { "1xSDR", IBV_RATE_2_5_GBPS }, { "1xDDR", IBV_RATE_5_GBPS }, { "1xQDR", IBV_RATE_10_GBPS }, { "4xSDR", IBV_RATE_10_GBPS }, { "4xDDR", IBV_RATE_20_GBPS }, { "4xQDR", IBV_RATE_40_GBPS }, { "8xSDR", IBV_RATE_20_GBPS }, { "8xDDR", IBV_RATE_40_GBPS }, { "8xQDR", IBV_RATE_80_GBPS }, { "2.5", IBV_RATE_2_5_GBPS }, { "5", IBV_RATE_5_GBPS }, { "10", IBV_RATE_10_GBPS }, { "20", IBV_RATE_20_GBPS }, { "30", IBV_RATE_30_GBPS }, { "40", IBV_RATE_40_GBPS }, { "60", IBV_RATE_60_GBPS }, { "80", IBV_RATE_80_GBPS }, { "120", IBV_RATE_120_GBPS }, }; /* * This routine is never called and is solely to avoid compiler warnings for * functions that are not currently being used. */ void rdma_not_called(void) { if (0) ib_migrate(NULL); } /* * Measure RC bi-directional bandwidth (client side). */ void run_client_rc_bi_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); rd_params(IBV_QPT_RC, K64, 1, 0); rd_bi_bw(IBV_QPT_RC); show_results(BANDWIDTH); } /* * Measure RC bi-directional bandwidth (server side). */ void run_server_rc_bi_bw(void) { rd_bi_bw(IBV_QPT_RC); } /* * Measure RC bandwidth (client side). */ void run_client_rc_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); par_use(L_NO_MSGS); par_use(R_NO_MSGS); rd_params(IBV_QPT_RC, K64, 1, 0); rd_client_bw(IBV_QPT_RC); show_results(BANDWIDTH); } /* * Measure RC bandwidth (server side). */ void run_server_rc_bw(void) { rd_server_def(IBV_QPT_RC); } /* * Measure RC compare and swap messaging rate (client side). */ void run_client_rc_compare_swap_mr(void) { ib_client_atomic(COMPARE_SWAP); } /* * Measure RC compare and swap messaging rate (server side). */ void run_server_rc_compare_swap_mr(void) { rd_server_nop(IBV_QPT_RC, sizeof(uint64_t)); } /* * Measure RC fetch and add messaging rate (client side). */ void run_client_rc_fetch_add_mr(void) { ib_client_atomic(FETCH_ADD); } /* * Measure RC fetch and add messaging rate (server side). */ void run_server_rc_fetch_add_mr(void) { rd_server_nop(IBV_QPT_RC, sizeof(uint64_t)); } /* * Measure RC latency (client side). */ void run_client_rc_lat(void) { rd_params(IBV_QPT_RC, 1, 1, 0); rd_pp_lat(IBV_QPT_RC, IO_SR); } /* * Measure RC latency (server side). */ void run_server_rc_lat(void) { rd_pp_lat(IBV_QPT_RC, IO_SR); } /* * Measure RC RDMA read bandwidth (client side). */ void run_client_rc_rdma_read_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); par_use(L_RD_ATOMIC); par_use(R_RD_ATOMIC); rd_params(IBV_QPT_RC, K64, 1, 0); rd_client_rdma_bw(IBV_QPT_RC, IBV_WR_RDMA_READ); show_results(BANDWIDTH); } /* * Measure RC RDMA read bandwidth (server side). */ void run_server_rc_rdma_read_bw(void) { rd_server_nop(IBV_QPT_RC, 0); } /* * Measure RC RDMA read latency (client side). */ void run_client_rc_rdma_read_lat(void) { rd_params(IBV_QPT_RC, 1, 1, 0); rd_client_rdma_read_lat(IBV_QPT_RC); } /* * Measure RC RDMA read latency (server side). */ void run_server_rc_rdma_read_lat(void) { rd_server_nop(IBV_QPT_RC, 0); } /* * Measure RC RDMA write bandwidth (client side). */ void run_client_rc_rdma_write_bw(void) { rd_params(IBV_QPT_RC, K64, 1, 0); rd_client_rdma_bw(IBV_QPT_RC, IBV_WR_RDMA_WRITE_WITH_IMM); show_results(BANDWIDTH); } /* * Measure RC RDMA write bandwidth (server side). */ void run_server_rc_rdma_write_bw(void) { rd_server_def(IBV_QPT_RC); } /* * Measure RC RDMA write latency (client side). */ void run_client_rc_rdma_write_lat(void) { rd_params(IBV_QPT_RC, 1, 1, 0); rd_pp_lat(IBV_QPT_RC, IO_RDMA); } /* * Measure RC RDMA write latency (server side). */ void run_server_rc_rdma_write_lat(void) { rd_pp_lat(IBV_QPT_RC, IO_RDMA); } /* * Measure RC RDMA write polling latency (client side). */ void run_client_rc_rdma_write_poll_lat(void) { rd_params(IBV_QPT_RC, 1, 0, 0); rd_rdma_write_poll_lat(IBV_QPT_RC); show_results(LATENCY); } /* * Measure RC RDMA write polling latency (server side). */ void run_server_rc_rdma_write_poll_lat(void) { rd_rdma_write_poll_lat(IBV_QPT_RC); } /* * Measure UC bi-directional bandwidth (client side). */ void run_client_uc_bi_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); rd_params(IBV_QPT_UC, K64, 1, 0); rd_bi_bw(IBV_QPT_UC); show_results(BANDWIDTH_SR); } /* * Measure UC bi-directional bandwidth (server side). */ void run_server_uc_bi_bw(void) { rd_bi_bw(IBV_QPT_UC); } /* * Measure UC bandwidth (client side). */ void run_client_uc_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); par_use(L_NO_MSGS); par_use(R_NO_MSGS); rd_params(IBV_QPT_UC, K64, 1, 0); rd_client_bw(IBV_QPT_UC); show_results(BANDWIDTH_SR); } /* * Measure UC bandwidth (server side). */ void run_server_uc_bw(void) { rd_server_def(IBV_QPT_UC); } /* * Measure UC latency (client side). */ void run_client_uc_lat(void) { rd_params(IBV_QPT_UC, 1, 1, 0); rd_pp_lat(IBV_QPT_UC, IO_SR); } /* * Measure UC latency (server side). */ void run_server_uc_lat(void) { rd_pp_lat(IBV_QPT_UC, IO_SR); } /* * Measure UC RDMA write bandwidth (client side). */ void run_client_uc_rdma_write_bw(void) { rd_params(IBV_QPT_UC, K64, 1, 0); rd_client_rdma_bw(IBV_QPT_UC, IBV_WR_RDMA_WRITE_WITH_IMM); show_results(BANDWIDTH_SR); } /* * Measure UC RDMA write bandwidth (server side). */ void run_server_uc_rdma_write_bw(void) { rd_server_def(IBV_QPT_UC); } /* * Measure UC RDMA write latency (client side). */ void run_client_uc_rdma_write_lat(void) { rd_params(IBV_QPT_UC, 1, 1, 0); rd_pp_lat(IBV_QPT_UC, IO_RDMA); } /* * Measure UC RDMA write latency (server side). */ void run_server_uc_rdma_write_lat(void) { rd_pp_lat(IBV_QPT_UC, IO_RDMA); } /* * Measure UC RDMA write polling latency (client side). */ void run_client_uc_rdma_write_poll_lat(void) { rd_params(IBV_QPT_UC, 1, 1, 0); rd_rdma_write_poll_lat(IBV_QPT_UC); show_results(LATENCY); } /* * Measure UC RDMA write polling latency (server side). */ void run_server_uc_rdma_write_poll_lat(void) { rd_rdma_write_poll_lat(IBV_QPT_UC); } /* * Measure UD bi-directional bandwidth (client side). */ void run_client_ud_bi_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); rd_params(IBV_QPT_UD, K2, 1, 0); rd_bi_bw(IBV_QPT_UD); show_results(BANDWIDTH_SR); } /* * Measure UD bi-directional bandwidth (server side). */ void run_server_ud_bi_bw(void) { rd_bi_bw(IBV_QPT_UD); } /* * Measure UD bandwidth (client side). */ void run_client_ud_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); par_use(L_NO_MSGS); par_use(R_NO_MSGS); rd_params(IBV_QPT_UD, K2, 1, 0); rd_client_bw(IBV_QPT_UD); show_results(BANDWIDTH_SR); } /* * Measure UD bandwidth (server side). */ void run_server_ud_bw(void) { rd_server_def(IBV_QPT_UD); } /* * Measure UD latency (client side). */ void run_client_ud_lat(void) { rd_params(IBV_QPT_UD, 1, 1, 0); rd_pp_lat(IBV_QPT_UD, IO_SR); } /* * Measure UD latency (server side). */ void run_server_ud_lat(void) { rd_pp_lat(IBV_QPT_UD, IO_SR); } #ifdef HAS_XRC /* * Measure XRC bi-directional bandwidth (client side). */ void run_client_xrc_bi_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); rd_params(IBV_QPT_XRC, K64, 1, 0); rd_bi_bw(IBV_QPT_XRC); show_results(BANDWIDTH); } /* * Measure XRC bi-directional bandwidth (server side). */ void run_server_xrc_bi_bw(void) { rd_bi_bw(IBV_QPT_XRC); } /* * Measure XRC bandwidth (client side). */ void run_client_xrc_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); par_use(L_NO_MSGS); par_use(R_NO_MSGS); rd_params(IBV_QPT_XRC, K64, 1, 0); rd_client_bw(IBV_QPT_XRC); show_results(BANDWIDTH); } /* * Measure XRC bandwidth (server side). */ void run_server_xrc_bw(void) { rd_server_def(IBV_QPT_XRC); } /* * Measure XRC latency (client side). */ void run_client_xrc_lat(void) { rd_params(IBV_QPT_XRC, 1, 1, 0); rd_pp_lat(IBV_QPT_XRC, IO_SR); } /* * Measure XRC latency (server side). */ void run_server_xrc_lat(void) { rd_pp_lat(IBV_QPT_XRC, IO_SR); } #endif /* HAS_XRC */ /* * Verify RC compare and swap (client side). */ void run_client_ver_rc_compare_swap(void) { ib_client_verify_atomic(COMPARE_SWAP); } /* * Verify RC compare and swap (server side). */ void run_server_ver_rc_compare_swap(void) { rd_server_nop(IBV_QPT_RC, sizeof(uint64_t)); } /* * Verify RC fetch and add (client side). */ void run_client_ver_rc_fetch_add(void) { ib_client_verify_atomic(FETCH_ADD); } /* * Verify RC fetch and add (server side). */ void run_server_ver_rc_fetch_add(void) { rd_server_nop(IBV_QPT_RC, sizeof(uint64_t)); } /* * Measure RDMA bandwidth (client side). */ static void rd_client_bw(int transport) { DEVICE dev; long sent = 0; rd_open(&dev, transport, NCQE, 0); rd_prep(&dev, 0); sync_test(); rd_post_send_std(&dev, left_to_send(&sent, NCQE)); sent = NCQE; while (!Finished) { int i; struct ibv_wc wc[NCQE]; int n = rd_poll(&dev, wc, cardof(wc)); if (n > LStat.max_cqes) LStat.max_cqes = n; if (Finished) break; for (i = 0; i < n; ++i) { int id = wc[i].wr_id; int status = wc[i].status; if (id != WRID_SEND) debug("bad WR ID %d", id); else if (status != IBV_WC_SUCCESS) do_error(status, &LStat.s.no_errs); } if (Req.no_msgs) { if (LStat.s.no_msgs + LStat.s.no_errs >= Req.no_msgs) break; n = left_to_send(&sent, n); } rd_post_send_std(&dev, n); sent += n; } stop_test_timer(); exchange_results(); rd_close(&dev); } /* * Default action for the server is to post receive buffers and whenever it * gets a completion entry, compute statistics and post more buffers. */ static void rd_server_def(int transport) { DEVICE dev; rd_open(&dev, transport, 0, NCQE); rd_prep(&dev, 0); rd_post_recv_std(&dev, NCQE); sync_test(); while (!Finished) { int i; struct ibv_wc wc[NCQE]; int n = rd_poll(&dev, wc, cardof(wc)); if (Finished) break; if (n > LStat.max_cqes) LStat.max_cqes = n; for (i = 0; i < n; ++i) { int status = wc[i].status; if (status == IBV_WC_SUCCESS) { LStat.r.no_bytes += dev.msg_size; LStat.r.no_msgs++; if (Req.access_recv) touch_data(dev.buffer, dev.msg_size); } else do_error(status, &LStat.r.no_errs); } if (Req.no_msgs) if (LStat.r.no_msgs + LStat.r.no_errs >= Req.no_msgs) break; rd_post_recv_std(&dev, n); } stop_test_timer(); exchange_results(); rd_close(&dev); } /* * Measure bi-directional RDMA bandwidth. */ static void rd_bi_bw(int transport) { DEVICE dev; rd_open(&dev, transport, NCQE, NCQE); rd_prep(&dev, 0); rd_post_recv_std(&dev, NCQE); sync_test(); rd_post_send_std(&dev, NCQE); while (!Finished) { int i; struct ibv_wc wc[NCQE]; int numSent = 0; int numRecv = 0; int n = rd_poll(&dev, wc, cardof(wc)); if (Finished) break; if (n > LStat.max_cqes) LStat.max_cqes = n; for (i = 0; i < n; ++i) { int id = wc[i].wr_id; int status = wc[i].status; switch (id) { case WRID_SEND: if (status != IBV_WC_SUCCESS) do_error(status, &LStat.s.no_errs); ++numSent; break; case WRID_RECV: if (status == IBV_WC_SUCCESS) { LStat.r.no_bytes += dev.msg_size; LStat.r.no_msgs++; if (Req.access_recv) touch_data(dev.buffer, dev.msg_size); } else do_error(status, &LStat.r.no_errs); ++numRecv; break; default: debug("bad WR ID %d", id); } } if (numRecv) rd_post_recv_std(&dev, numRecv); if (numSent) rd_post_send_std(&dev, numSent); } stop_test_timer(); exchange_results(); rd_close(&dev); } /* * Measure ping-pong latency (client and server side). */ static void rd_pp_lat(int transport, IOMODE iomode) { DEVICE dev; rd_open(&dev, transport, 1, 1); rd_prep(&dev, 0); rd_pp_lat_loop(&dev, iomode); stop_test_timer(); exchange_results(); rd_close(&dev); if (is_client()) show_results(LATENCY); } /* * Loop sending packets back and forth to measure ping-pong latency. */ static void rd_pp_lat_loop(DEVICE *dev, IOMODE iomode) { int done = 1; rd_post_recv_std(dev, 1); sync_test(); if (is_client()) { if (iomode == IO_SR) rd_post_send_std(dev, 1); else rd_post_rdma_std(dev, IBV_WR_RDMA_WRITE_WITH_IMM, 1); done = 0; } while (!Finished) { int i; struct ibv_wc wc[2]; int n = rd_poll(dev, wc, cardof(wc)); if (Finished) break; for (i = 0; i < n; ++i) { int id = wc[i].wr_id; int status = wc[i].status; switch (id) { case WRID_SEND: case WRID_RDMA: if (status != IBV_WC_SUCCESS) do_error(status, &LStat.s.no_errs); done |= 1; continue; case WRID_RECV: if (status == IBV_WC_SUCCESS) { LStat.r.no_bytes += dev->msg_size; LStat.r.no_msgs++; rd_post_recv_std(dev, 1); } else do_error(status, &LStat.r.no_errs); done |= 2; continue; default: debug("bad WR ID %d", id); continue; } break; } if (done == 3) { if (iomode == IO_SR) rd_post_send_std(dev, 1); else rd_post_rdma_std(dev, IBV_WR_RDMA_WRITE_WITH_IMM, 1); done = 0; } } } /* * Loop sending packets back and forth using RDMA Write and polling to measure * latency. This is the strategy used by some of the MPIs. Note that it does * not matter what characters clientid and serverid are set to as long as they * are different. Note also that we must set *p and *q before calling * sync_test to avoid a race condition. */ static void rd_rdma_write_poll_lat(int transport) { DEVICE dev; volatile unsigned char *p, *q; int send, locid, remid; int clientid = 0x55; int serverid = 0xaa; if (is_client()) send = 1, locid = clientid, remid = serverid; else send = 0, locid = serverid, remid = clientid; rd_open(&dev, transport, NCQE, 0); rd_prep(&dev, 0); p = (unsigned char *)dev.buffer; q = p + dev.msg_size-1; *p = locid; *q = locid; sync_test(); while (!Finished) { if (send) { int i; int n; struct ibv_wc wc[2]; rd_post_rdma_std(&dev, IBV_WR_RDMA_WRITE, 1); if (Finished) break; n = ibv_poll_cq(dev.cq, cardof(wc), wc); if (n < 0) error(SYS, "CQ poll failed"); for (i = 0; i < n; ++i) { int id = wc[i].wr_id; int status = wc[i].status; if (id != WRID_RDMA) debug("bad WR ID %d", id); else if (status != IBV_WC_SUCCESS) do_error(status, &LStat.s.no_errs); } } while (!Finished) if (*p == remid && *q == remid) break; LStat.r.no_bytes += dev.msg_size; LStat.r.no_msgs++; *p = locid; *q = locid; send = 1; } stop_test_timer(); exchange_results(); rd_close(&dev); } /* * Measure RDMA Read latency (client side). */ static void rd_client_rdma_read_lat(int transport) { DEVICE dev; rd_open(&dev, transport, 1, 0); rd_prep(&dev, 0); sync_test(); rd_post_rdma_std(&dev, IBV_WR_RDMA_READ, 1); while (!Finished) { struct ibv_wc wc; int n = rd_poll(&dev, &wc, 1); if (n == 0) continue; if (Finished) break; if (wc.wr_id != WRID_RDMA) { debug("bad WR ID %d", (int)wc.wr_id); continue; } if (wc.status == IBV_WC_SUCCESS) { LStat.r.no_bytes += dev.msg_size; LStat.r.no_msgs++; LStat.rem_s.no_bytes += dev.msg_size; LStat.rem_s.no_msgs++; } else do_error(wc.status, &LStat.s.no_errs); rd_post_rdma_std(&dev, IBV_WR_RDMA_READ, 1); } stop_test_timer(); exchange_results(); rd_close(&dev); show_results(LATENCY); } /* * Measure RDMA bandwidth (client side). */ static void rd_client_rdma_bw(int transport, ibv_op opcode) { DEVICE dev; rd_open(&dev, transport, NCQE, 0); rd_prep(&dev, 0); sync_test(); rd_post_rdma_std(&dev, opcode, NCQE); while (!Finished) { int i; struct ibv_wc wc[NCQE]; int n = rd_poll(&dev, wc, cardof(wc)); if (Finished) break; if (n > LStat.max_cqes) LStat.max_cqes = n; for (i = 0; i < n; ++i) { int status = wc[i].status; if (status == IBV_WC_SUCCESS) { if (opcode == IBV_WR_RDMA_READ) { LStat.r.no_bytes += dev.msg_size; LStat.r.no_msgs++; LStat.rem_s.no_bytes += dev.msg_size; LStat.rem_s.no_msgs++; if (Req.access_recv) touch_data(dev.buffer, dev.msg_size); } } else do_error(status, &LStat.s.no_errs); } rd_post_rdma_std(&dev, opcode, n); } stop_test_timer(); exchange_results(); rd_close(&dev); } /* * Server just waits and lets driver take care of any requests. */ static void rd_server_nop(int transport, int size) { DEVICE dev; /* workaround: Size of RQ should be 0; bug in Mellanox driver */ rd_open(&dev, transport, 0, 1); rd_prep(&dev, size); sync_test(); while (!Finished) pause(); stop_test_timer(); exchange_results(); rd_close(&dev); } /* * Measure messaging rate for an atomic operation. */ static void ib_client_atomic(ATOMIC atomic) { int i; DEVICE dev; rd_params(IBV_QPT_RC, 0, 1, 1); rd_open(&dev, IBV_QPT_RC, NCQE, 0); rd_prep(&dev, sizeof(uint64_t)); sync_test(); for (i = 0; i < NCQE; ++i) { if (Finished) break; ib_post_atomic(&dev, atomic, 0, 0, 0, 0); } while (!Finished) { struct ibv_wc wc[NCQE]; int n = rd_poll(&dev, wc, cardof(wc)); if (Finished) break; if (n > LStat.max_cqes) LStat.max_cqes = n; for (i = 0; i < n; ++i) { int status = wc[i].status; if (status == IBV_WC_SUCCESS) { LStat.rem_r.no_bytes += sizeof(uint64_t); LStat.rem_r.no_msgs++; } else do_error(status, &LStat.s.no_errs); ib_post_atomic(&dev, atomic, 0, 0, 0, 0); } } stop_test_timer(); exchange_results(); rd_close(&dev); show_results(MSG_RATE); } /* * Verify RC compare and swap (client side). */ static void ib_client_verify_atomic(ATOMIC atomic) { int i; int slots; DEVICE dev; int head = 0; int tail = 0; uint64_t args[2] = {0}; rd_params(IBV_QPT_RC, K64, 1, 1); rd_open(&dev, IBV_QPT_RC, NCQE, 0); slots = Req.msg_size / sizeof(uint64_t); if (slots < 1) error(0, "message size must be at least %d", sizeof(uint64_t)); if (slots > NCQE) slots = NCQE; rd_prep(&dev, 0); sync_test(); for (i = 0; i < slots; ++i) { if (Finished) break; atomic_seq(atomic, head++, 0, args); ib_post_atomic(&dev, atomic, i, i*sizeof(uint64_t), args[0], args[1]); } while (!Finished) { struct ibv_wc wc[NCQE]; int n = rd_poll(&dev, wc, cardof(wc)); if (Finished) break; if (n > LStat.max_cqes) LStat.max_cqes = n; for (i = 0; i < n; ++i) { uint64_t seen; uint64_t want = 0; int x = wc[i].wr_id; int status = wc[i].status; if (status == IBV_WC_SUCCESS) { LStat.rem_r.no_bytes += sizeof(uint64_t); LStat.rem_r.no_msgs++; } else do_error(status, &LStat.s.no_errs); atomic_seq(atomic, tail++, &want, 0); seen = ((uint64_t *)dev.buffer)[x]; if (seen != want) { error(0, "mismatch, sequence %d, expected %llx, got %llx", tail, (long long)want, (long long)seen); } atomic_seq(atomic, head++, 0, args); ib_post_atomic(&dev, atomic, x, x*sizeof(uint64_t), args[0], args[1]); } } stop_test_timer(); exchange_results(); rd_close(&dev); show_results(MSG_RATE); } /* * Given an atomic operation and an index, return the next value associated * with that index and the arguments we might pass to post that atomic. */ static void atomic_seq(ATOMIC atomic, int i, uint64_t *value, uint64_t *args) { if (atomic == COMPARE_SWAP) { uint64_t v; uint64_t magic = 0x0123456789abcdefULL; v = i ? magic + i-1 : 0; if (value) *value = v; if (args) { args[0] = v; args[1] = magic + i; } } else if (atomic == FETCH_ADD) { if (value) *value = i; if (args) args[0] = 1; } } /* * Set default parameters. */ static void rd_params(int transport, long msg_size, int poll, int atomic) { //if (transport == IBV_QPT_RC || transport == IBV_QPT_UD) { if (transport == IBV_QPT_RC) { par_use(L_USE_CM); par_use(R_USE_CM); } else { setv_u32(L_USE_CM, 0); setv_u32(R_USE_CM, 0); } if (!Req.use_cm) { setp_u32(0, L_MTU_SIZE, MTU_SIZE); setp_u32(0, R_MTU_SIZE, MTU_SIZE); par_use(L_ID); par_use(R_ID); par_use(L_SL); par_use(R_SL); par_use(L_STATIC_RATE); par_use(R_STATIC_RATE); par_use(L_SRC_PATH_BITS); par_use(R_SRC_PATH_BITS); } if (msg_size) { setp_u32(0, L_MSG_SIZE, msg_size); setp_u32(0, R_MSG_SIZE, msg_size); } if (poll) { par_use(L_POLL_MODE); par_use(R_POLL_MODE); } if (atomic) { par_use(L_RD_ATOMIC); par_use(R_RD_ATOMIC); } opt_check(); } /* * Open a RDMA device. */ static void rd_open(DEVICE *dev, int trans, int max_send_wr, int max_recv_wr) { /* Send request to client */ if (is_client()) client_send_request(); /* Clear structure */ memset(dev, 0, sizeof(*dev)); /* Set transport type and maximum work request parameters */ dev->trans = trans; dev->max_send_wr = max_send_wr; dev->max_recv_wr = max_recv_wr; /* Open device */ if (Req.use_cm) cm_open(dev); else ib_open(dev); /* Get QP attributes */ { struct ibv_qp_attr qp_attr; struct ibv_qp_init_attr qp_init_attr; if (ibv_query_qp(dev->qp, &qp_attr, IBV_QP_CAP, &qp_init_attr) != 0) error(SYS, "query QP failed"); dev->max_inline = qp_attr.cap.max_inline_data; } } /* * Called after rd_open to prepare both ends. */ static void rd_prep(DEVICE *dev, int size) { /* Set the size of the messages we transfer */ if (size == 0) dev->msg_size = Req.msg_size; /* Allocate memory region */ if (size == 0) size = dev->msg_size; if (dev->trans == IBV_QPT_UD) size += GRH_SIZE; rd_mralloc(dev, size); /* Exchange node information */ { NODE node; enc_init(&node); enc_node(&dev->lnode); send_mesg(&node, sizeof(node), "node information"); recv_mesg(&node, sizeof(node), "node information"); dec_init(&node); dec_node(&dev->rnode); } /* Second phase of open for devices */ if (Req.use_cm) cm_prep(dev); else ib_prep(dev); /* Request CQ notification if not polling */ if (!Req.poll_mode) { if (ibv_req_notify_cq(dev->cq, 0) != 0) error(SYS, "failed to request CQ notification"); } /* Show node information if debugging */ show_node_info(dev); } /* * Show node information when debugging. */ static void show_node_info(DEVICE *dev) { NODE *n; if (!Debug) return; n = &dev->lnode; if (Req.use_cm) debug("L: rkey=%08x vaddr=%010x", n->rkey, n->vaddr); #ifdef HAS_XRC else if (dev->trans == IBV_QPT_XRC) { debug("L: lid=%04x qpn=%06x psn=%06x rkey=%08x vaddr=%010x srqn=%08x", n->lid, n->qpn, n->psn, n->rkey, n->vaddr, n->srqn); } #endif else { debug("L: lid=%04x qpn=%06x psn=%06x rkey=%08x vaddr=%010x", n->lid, n->qpn, n->psn, n->rkey, n->vaddr); } n = &dev->rnode; if (Req.use_cm) debug("R: rkey=%08x vaddr=%010x", n->rkey, n->vaddr); #ifdef HAS_XRC else if (dev->trans == IBV_QPT_XRC) { debug("R: lid=%04x qpn=%06x psn=%06x rkey=%08x vaddr=%010x srqn=%08x", n->lid, n->qpn, n->psn, n->rkey, n->vaddr); } #endif else { debug("R: lid=%04x qpn=%06x psn=%06x rkey=%08x vaddr=%010x", n->lid, n->qpn, n->psn, n->rkey, n->vaddr, n->srqn); } } /* * Close a RDMA device. We must destroy the CQ before the QP otherwise the * ibv_destroy_qp call seems to sometimes hang. We must also destroy the QP * before destroying the memory region as we cannot destroy the memory region * if there are references still outstanding. Hopefully we now have things in * the right order. */ static void rd_close(DEVICE *dev) { if (Req.use_cm) cm_close(dev); else ib_close1(dev); if (dev->ah) ibv_destroy_ah(dev->ah); if (dev->cq) ibv_destroy_cq(dev->cq); if (dev->pd) ibv_dealloc_pd(dev->pd); if (dev->channel) ibv_destroy_comp_channel(dev->channel); rd_mrfree(dev); if (!Req.use_cm) ib_close2(dev); memset(dev, 0, sizeof(*dev)); } /* * Create a queue pair. */ static void rd_create_qp(DEVICE *dev, struct ibv_context *context, struct rdma_cm_id *id) { /* Set up and verify rd_atomic parameters */ { struct ibv_device_attr dev_attr; if (ibv_query_device(context, &dev_attr) != SUCCESS0) error(SYS, "query device failed"); if (Req.rd_atomic == 0) dev->lnode.rd_atomic = dev_attr.max_qp_rd_atom; else if (Req.rd_atomic <= dev_attr.max_qp_rd_atom) dev->lnode.rd_atomic = Req.rd_atomic; else error(0, "device only supports %d (< %d) RDMA reads or atomics", dev_attr.max_qp_rd_atom, Req.rd_atomic); } /* Allocate completion channel */ dev->channel = ibv_create_comp_channel(context); if (!dev->channel) error(SYS, "failed to create completion channel"); /* Allocate protection domain */ dev->pd = ibv_alloc_pd(context); if (!dev->pd) error(SYS, "failed to allocate protection domain"); /* Create completion queue */ dev->cq = ibv_create_cq(context, dev->max_send_wr+dev->max_recv_wr, 0, dev->channel, 0); if (!dev->cq) error(SYS, "failed to create completion queue"); /* Create queue pair */ { struct ibv_qp_init_attr qp_attr ={ .send_cq = dev->cq, .recv_cq = dev->cq, .cap ={ .max_send_wr = dev->max_send_wr, .max_recv_wr = dev->max_recv_wr, .max_send_sge = 1, .max_recv_sge = 1, }, .qp_type = dev->trans }; if (Req.use_cm) { if (rdma_create_qp(id, dev->pd, &qp_attr) != 0) error(SYS, "failed to create QP"); dev->qp = id->qp; } else { #ifdef HAS_XRC if (dev->trans == IBV_QPT_XRC) { struct ibv_srq_init_attr srq_attr ={ .attr ={ .max_wr = dev->max_recv_wr, .max_sge = 1 } }; dev->xrc = ibv_open_xrc_domain(context, -1, O_CREAT); if (!dev->xrc) error(SYS, "failed to open XRC domain"); dev->srq = ibv_create_xrc_srq(dev->pd, dev->xrc, dev->cq, &srq_attr); if (!dev->srq) error(SYS, "failed to create SRQ"); qp_attr.cap.max_recv_wr = 0; qp_attr.cap.max_recv_sge = 0; qp_attr.xrc_domain = dev->xrc; } #endif /* HAS_XRC */ dev->qp = ibv_create_qp(dev->pd, &qp_attr); if (!dev->qp) error(SYS, "failed to create QP"); } } } /* * Allocate a memory region and register it. I thought this routine should * never be called with a size of 0 as prior code checks for that and sets it * to some default value. I appear to be wrong. In that case, size is set to * 1 so other code does not break. */ static void rd_mralloc(DEVICE *dev, int size) { int flags; int pagesize; if (dev->buffer) error(BUG, "rd_mralloc: memory region already allocated"); if (size == 0) size = 1; pagesize = sysconf(_SC_PAGESIZE); if (posix_memalign((void **)&dev->buffer, pagesize, size) != 0) error(SYS, "failed to allocate memory"); memset(dev->buffer, 0, size); dev->buf_size = size; flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; dev->mr = ibv_reg_mr(dev->pd, dev->buffer, size, flags); if (!dev->mr) error(SYS, "failed to allocate memory region"); dev->lnode.rkey = dev->mr->rkey; dev->lnode.vaddr = (unsigned long)dev->buffer; } /* * Free the memory region. */ static void rd_mrfree(DEVICE *dev) { if (dev->mr) ibv_dereg_mr(dev->mr); dev->mr = NULL; if (dev->buffer) free(dev->buffer); dev->buffer = NULL; dev->buf_size = 0; dev->lnode.rkey = 0; dev->lnode.vaddr = 0; } /* * Open a device using the Connection Manager. */ static void cm_open(DEVICE *dev) { cm_init(dev); if (is_client()) cm_open_client(dev); else cm_open_server(dev); } /* * Open a channel to report communication events and allocate a communication * id. */ static void cm_init(DEVICE *dev) { CMINFO *cm = &dev->cm; int portspace = (dev->trans == IBV_QPT_RC) ? RDMA_PS_TCP : RDMA_PS_UDP; cm->channel = rdma_create_event_channel(); if (!cm->channel) error(0, "rdma_create_event_channel failed"); if (rdma_create_id(cm->channel, &cm->id, 0, portspace) != 0) error(0, "rdma_create_id failed"); } /* * Open a device using the Connection Manager when we are the client. */ static void cm_open_client(DEVICE *dev) { AI *aip; uint32_t port; struct addrinfo hints ={ .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; int timeout = Req.timeout * 1000; CMINFO *cm = &dev->cm; recv_mesg(&port, sizeof(port), "RDMA CM TCP IPv4 server port"); port = decode_uint32(&port); aip = getaddrinfo_port(ServerName, port, &hints); cm_init(dev); if (rdma_resolve_addr(cm->id, 0, (SA *)aip->ai_addr, timeout) != 0) error(0, "rdma_resolve_addr failed"); freeaddrinfo(aip); cm_expect_event(dev, RDMA_CM_EVENT_ADDR_RESOLVED); cm_ack_event(dev); if (rdma_resolve_route(cm->id, timeout) != 0) error(0, "rdma_resolve_route failed"); cm_expect_event(dev, RDMA_CM_EVENT_ROUTE_RESOLVED); cm_ack_event(dev); rd_create_qp(dev, cm->id->verbs, cm->id); if (dev->trans == IBV_QPT_RC) { struct rdma_conn_param param ={ .responder_resources = 1, .initiator_depth = 1, .rnr_retry_count = RNR_RETRY_CNT, .retry_count = RETRY_CNT }; if (rdma_connect(cm->id, ¶m) != 0) error(0, "rdma_connect failed"); cm_expect_event(dev, RDMA_CM_EVENT_ESTABLISHED); cm_ack_event(dev); } else if (dev->trans == IBV_QPT_UD) { struct rdma_conn_param param ={ .qp_num = cm->id->qp->qp_num }; if (rdma_connect(cm->id, ¶m) != 0) error(0, "rdma_connect failed"); cm_expect_event(dev, RDMA_CM_EVENT_ESTABLISHED); dev->qkey = cm->event->param.ud.qkey; dev->ah = ibv_create_ah(dev->pd, &cm->event->param.ud.ah_attr); if (!dev->ah) error(SYS, "failed to create address handle"); cm_ack_event(dev); } else error(BUG, "cm_open_client: bad transport: %d", dev->trans); } /* * Open a device using the Connection Manager when we are the client. */ static void cm_open_server(DEVICE *dev) { uint32_t port; struct sockaddr_in saddr ={ .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(0) }; CMINFO *cm = &dev->cm; if (rdma_bind_addr(cm->id, (SA *)&saddr) != 0) error(0, "rdma_bind_addr failed"); port = ntohs(rdma_get_src_port(cm->id)); encode_uint32(&port, port); send_mesg(&port, sizeof(port), "RDMA CM TCP IPv4 server port"); if (rdma_listen(cm->id, 0) != 0) error(0, "rdma_listen failed"); cm_expect_event(dev, RDMA_CM_EVENT_CONNECT_REQUEST); rd_create_qp(dev, cm->event->id->verbs, cm->event->id); if (dev->trans == IBV_QPT_RC) { struct rdma_conn_param param ={ .responder_resources = 1, .initiator_depth = 1, .rnr_retry_count = RNR_RETRY_CNT, .retry_count = RETRY_CNT }; struct ibv_qp_attr rtr_attr ={ .min_rnr_timer = MIN_RNR_TIMER, }; if (rdma_accept(cm->event->id, ¶m) != 0) error(0, "rdma_accept failed"); cm_ack_event(dev); cm_expect_event(dev, RDMA_CM_EVENT_ESTABLISHED); cm_ack_event(dev); /* Do not complain on error as we might be on a iWARP device */ ibv_modify_qp(dev->qp, &rtr_attr, IBV_QP_MIN_RNR_TIMER); } else if (dev->trans == IBV_QPT_UD) { struct rdma_conn_param param ={ .qp_num = cm->event->id->qp->qp_num }; if (rdma_accept(cm->event->id, ¶m) != 0) error(0, "rdma_accept failed"); dev->qkey = cm->event->param.ud.qkey; dev->ah = ibv_create_ah(dev->pd, &cm->event->param.ud.ah_attr); if (!dev->ah) error(SYS, "failed to create address handle"); cm_ack_event(dev); } else error(BUG, "cm_open_server: bad transport: %d", dev->trans); } /* * Prepare a device using the Connection Manager. Final stage of open. */ static void cm_prep(DEVICE *dev) { } /* * Close a device using the Connection Manager. */ static void cm_close(DEVICE *dev) { if (is_client()) if (rdma_disconnect(dev->cm.id) != 0) error(SYS, "rdma_disconnect failed"); cm_expect_event(dev, RDMA_CM_EVENT_DISCONNECTED); cm_ack_event(dev); rdma_destroy_qp(dev->cm.id); rdma_destroy_id(dev->cm.id); rdma_destroy_event_channel(dev->cm.channel); } /* * Get an event from the Connection Manager. If it is not what we expect, * complain. */ static void cm_expect_event(DEVICE *dev, int expected) { char msg1[64]; char msg2[64]; CMINFO *cm = &dev->cm; if (rdma_get_cm_event(cm->channel, &cm->event) != 0) error(0, "failed to receive event from RDMA CM channel"); if (cm->event->event != expected) { error(0, "unexpected event from RDMA CM: %s\n expecting: %s", cm_event_name(cm->event->event, msg1, sizeof(msg1)), cm_event_name(expected, msg2, sizeof(msg2))); } } /* * Return a name given a RDMA CM event number. We first look at our list. If * that fails, we call the standard rdma_event_str routine. */ static char * cm_event_name(int event, char *data, int size) { int i; for (i = 0; i < cardof(CMEvents); ++i) if (event == CMEvents[i].value) return CMEvents[i].name; strncpy(data, rdma_event_str(event), size); data[size-1] = '\0'; return data; } /* * Acknowledge and free a communication event. */ static void cm_ack_event(DEVICE *dev) { if (rdma_ack_cm_event(dev->cm.event) != 0) error(0, "rdma_ack_cm_event failed"); } /* * Open an InfiniBand device. */ static void ib_open(DEVICE *dev) { /* Determine MTU */ { int mtu = Req.mtu_size; if (mtu == 256) dev->ib.mtu = IBV_MTU_256; else if (mtu == 512) dev->ib.mtu = IBV_MTU_512; else if (mtu == 1024) dev->ib.mtu = IBV_MTU_1024; else if (mtu == 2048) dev->ib.mtu = IBV_MTU_2048; else if (mtu == 4096) dev->ib.mtu = IBV_MTU_4096; else error(0, "bad MTU: %d; must be 256/512/1K/2K/4K", mtu); } /* Determine port */ { int port = 1; char *p = index(Req.id, ':'); if (p) { *p++ = '\0'; port = atoi(p); if (port < 1) error(0, "bad IB port: %d; must be at least 1", port); } dev->ib.port = port; } /* Determine static rate */ { RATES *q = Rates; RATES *r = q + cardof(Rates); for (;; ++q) { if (q >= r) error(SYS, "bad static rate: %s", Req.static_rate); if (streq(Req.static_rate, q->name)) { dev->ib.rate = q->rate; break; } } } /* Set up Q Key */ dev->qkey = QKEY; /* Open device */ { struct ibv_device *device; char *name = Req.id[0] ? Req.id : 0; dev->ib.devlist = ibv_get_device_list(0); if (!dev->ib.devlist) error(SYS, "failed to find any InfiniBand devices"); if (!name) device = *dev->ib.devlist; else { struct ibv_device **d = dev->ib.devlist; while ((device = *d++)) if (streq(ibv_get_device_name(device), name)) break; } if (!device) error(SYS, "failed to find InfiniBand device"); dev->ib.context = ibv_open_device(device); if (!dev->ib.context) { const char *s = ibv_get_device_name(device); error(SYS, "failed to open device %s", s); } } /* Set up local node LID */ { struct ibv_port_attr port_attr; int stat = ibv_query_port(dev->ib.context, dev->ib.port, &port_attr); if (stat != 0) error(SYS, "query port failed"); srand48(getpid()*time(0)); dev->lnode.lid = port_attr.lid; if (port_attr.lmc > 0) dev->lnode.lid += Req.src_path_bits & ((1 << port_attr.lmc) - 1); } /* Create QP */ rd_create_qp(dev, dev->ib.context, 0); /* Modify queue pair to INIT state */ { struct ibv_qp_attr attr ={ .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = dev->ib.port }; int flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT; if (dev->trans == IBV_QPT_UD) { flags |= IBV_QP_QKEY; attr.qkey = dev->qkey; #ifdef HAS_XRC } else if (dev->trans == IBV_QPT_RC || dev->trans == IBV_QPT_XRC) { #else } else if (dev->trans == IBV_QPT_RC) { #endif flags |= IBV_QP_ACCESS_FLAGS; attr.qp_access_flags = IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; } else if (dev->trans == IBV_QPT_UC) { flags |= IBV_QP_ACCESS_FLAGS; attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE; } if (ibv_modify_qp(dev->qp, &attr, flags) != SUCCESS0) error(SYS, "failed to modify QP to INIT state"); } /* Set up local node QP number, PSN and SRQ number */ dev->lnode.qpn = dev->qp->qp_num; dev->lnode.psn = lrand48() & 0xffffff; #ifdef HAS_XRC if (dev->trans == IBV_QPT_XRC) dev->lnode.srqn = dev->srq->xrc_srq_num; #endif /* Set up alternate port LID */ if (Req.alt_port) { struct ibv_port_attr port_attr; int stat = ibv_query_port(dev->ib.context, Req.alt_port, &port_attr); if (stat != SUCCESS0) error(SYS, "query port failed"); dev->lnode.alt_lid = port_attr.lid; if (port_attr.lmc > 0) dev->lnode.alt_lid += Req.src_path_bits & ((1 << port_attr.lmc) - 1); } } /* * Prepare the InfiniBand device for receiving and sending. Final stage of * open. */ static void ib_prep(DEVICE *dev) { int flags; struct ibv_qp_attr rtr_attr ={ .qp_state = IBV_QPS_RTR, .path_mtu = dev->ib.mtu, .dest_qp_num = dev->rnode.qpn, .rq_psn = dev->rnode.psn, .min_rnr_timer = MIN_RNR_TIMER, .max_dest_rd_atomic = dev->lnode.rd_atomic, .ah_attr = { .dlid = dev->rnode.lid, .port_num = dev->ib.port, .static_rate = dev->ib.rate, .src_path_bits = Req.src_path_bits, .sl = Req.sl } }; struct ibv_qp_attr rts_attr ={ .qp_state = IBV_QPS_RTS, .timeout = LOCAL_ACK_TIMEOUT, .retry_cnt = RETRY_CNT, .rnr_retry = RNR_RETRY_CNT, .sq_psn = dev->lnode.psn, .max_rd_atomic = dev->rnode.rd_atomic, .path_mig_state = IBV_MIG_REARM, .alt_port_num = Req.alt_port, .alt_ah_attr = { .dlid = dev->rnode.alt_lid, .port_num = Req.alt_port, .static_rate = dev->ib.rate, .src_path_bits = Req.src_path_bits, .sl = Req.sl } }; struct ibv_ah_attr ah_attr ={ .dlid = dev->rnode.lid, .port_num = dev->ib.port, .static_rate = dev->ib.rate, .src_path_bits = Req.src_path_bits, .sl = Req.sl }; if (dev->trans == IBV_QPT_UD) { /* Modify queue pair to RTR */ flags = IBV_QP_STATE; if (ibv_modify_qp(dev->qp, &rtr_attr, flags) != 0) error(SYS, "failed to modify QP to RTR"); /* Modify queue pair to RTS */ flags = IBV_QP_STATE | IBV_QP_SQ_PSN; if (ibv_modify_qp(dev->qp, &rts_attr, flags) != 0) error(SYS, "failed to modify QP to RTS"); /* Create address handle */ dev->ah = ibv_create_ah(dev->pd, &ah_attr); if (!dev->ah) error(SYS, "failed to create address handle"); #ifdef HAS_XRC } else if (dev->trans == IBV_QPT_RC || dev->trans == IBV_QPT_XRC) { #else } else if (dev->trans == IBV_QPT_RC) { #endif /* Modify queue pair to RTR */ flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; if (ibv_modify_qp(dev->qp, &rtr_attr, flags) != 0) error(SYS, "failed to modify QP to RTR"); /* Modify queue pair to RTS */ flags = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC; if (dev->trans == IBV_QPT_RC && dev->rnode.alt_lid) flags |= IBV_QP_ALT_PATH | IBV_QP_PATH_MIG_STATE; if (ibv_modify_qp(dev->qp, &rts_attr, flags) != 0) error(SYS, "failed to modify QP to RTS"); } else if (dev->trans == IBV_QPT_UC) { /* Modify queue pair to RTR */ flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN; if (ibv_modify_qp(dev->qp, &rtr_attr, flags) != 0) error(SYS, "failed to modify QP to RTR"); /* Modify queue pair to RTS */ flags = IBV_QP_STATE | IBV_QP_SQ_PSN; if (dev->rnode.alt_lid) flags |= IBV_QP_ALT_PATH | IBV_QP_PATH_MIG_STATE; if (ibv_modify_qp(dev->qp, &rts_attr, flags) != 0) error(SYS, "failed to modify QP to RTS"); } } /* * Close an InfiniBand device, part 1. */ static void ib_close1(DEVICE *dev) { if (dev->qp) ibv_destroy_qp(dev->qp); if (dev->srq) ibv_destroy_srq(dev->srq); #ifdef HAS_XRC if (dev->xrc) ibv_close_xrc_domain(dev->xrc); #endif } /* * Close an InfiniBand device, part 2. */ static void ib_close2(DEVICE *dev) { if (dev->ib.context) ibv_close_device(dev->ib.context); if (dev->ib.devlist) free(dev->ib.devlist); } /* * Cause a path migration to happen. */ static void ib_migrate(DEVICE *dev) { if (!Req.alt_port) return; /* Only migrate once. */ Req.alt_port = 0; if (dev->trans != IBV_QPT_RC && dev->trans != IBV_QPT_UC) return; { struct ibv_qp_attr attr ={ .path_mig_state = IBV_MIG_MIGRATED, }; if (ibv_modify_qp(dev->qp, &attr, IBV_QP_PATH_MIG_STATE) != SUCCESS0) error(SYS, "failed to modify QP to Migrated state"); } } /* * Post an atomic. */ static void ib_post_atomic(DEVICE *dev, ATOMIC atomic, int wrid, int offset, uint64_t compare_add, uint64_t swap) { struct ibv_sge sge ={ .addr = (uintptr_t)dev->buffer + offset, .length = sizeof(uint64_t), .lkey = dev->mr->lkey }; struct ibv_send_wr wr ={ .wr_id = wrid, .sg_list = &sge, .num_sge = 1, .send_flags = IBV_SEND_SIGNALED, .wr = { .atomic = { .remote_addr = dev->rnode.vaddr, .rkey = dev->rnode.rkey, } } }; struct ibv_send_wr *badwr; if (atomic == COMPARE_SWAP) { wr.opcode = IBV_WR_ATOMIC_CMP_AND_SWP; wr.wr.atomic.compare_add = compare_add; wr.wr.atomic.swap = swap; } else if (atomic == FETCH_ADD) { wr.opcode = IBV_WR_ATOMIC_FETCH_AND_ADD; wr.wr.atomic.compare_add = compare_add; } errno = 0; if (ibv_post_send(dev->qp, &wr, &badwr) != SUCCESS0) { if (Finished && errno == EINTR) return; if (atomic == COMPARE_SWAP) error(SYS, "failed to post compare and swap"); else if (atomic == FETCH_ADD) error(SYS, "failed to post fetch and add"); else error(BUG, "bad atomic: %d", atomic); } LStat.s.no_bytes += sizeof(uint64_t); LStat.s.no_msgs++; } /* * The standard version to post sends that most of the test routines call. * Post n sends. */ static void rd_post_send_std(DEVICE *dev, int n) { rd_post_send(dev, 0, dev->msg_size, 0, n, 1); } /* * Post one or more sends. */ static void rd_post_send(DEVICE *dev, int off, int len, int inc, int rep, int stat) { struct ibv_sge sge ={ .addr = (uintptr_t) &dev->buffer[off], .length = len, .lkey = dev->mr->lkey }; struct ibv_send_wr wr ={ .wr_id = WRID_SEND, .sg_list = &sge, .num_sge = 1, .opcode = IBV_WR_SEND, .send_flags = IBV_SEND_SIGNALED, }; struct ibv_send_wr *badwr; if (dev->trans == IBV_QPT_UD) { wr.wr.ud.ah = dev->ah; wr.wr.ud.remote_qpn = dev->rnode.qpn; wr.wr.ud.remote_qkey = dev->qkey; } #ifdef HAS_XRC else if (dev->trans == IBV_QPT_XRC) wr.xrc_remote_srq_num = dev->rnode.srqn; #endif if (dev->msg_size <= dev->max_inline) wr.send_flags |= IBV_SEND_INLINE; errno = 0; while (!Finished && rep-- > 0) { if (ibv_post_send(dev->qp, &wr, &badwr) != SUCCESS0) { if (Finished && errno == EINTR) return; error(SYS, "failed to post send"); } sge.addr += inc; sge.length += inc; if (stat) { LStat.s.no_bytes += dev->msg_size; LStat.s.no_msgs++; } } } /* * Post n receives. */ static void rd_post_recv_std(DEVICE *dev, int n) { struct ibv_sge sge ={ .addr = (uintptr_t) dev->buffer, .length = dev->buf_size, .lkey = dev->mr->lkey }; struct ibv_recv_wr wr ={ .wr_id = WRID_RECV, .sg_list = &sge, .num_sge = 1, }; struct ibv_recv_wr *badwr; errno = 0; while (!Finished && n-- > 0) { int stat; if (dev->srq) stat = ibv_post_srq_recv(dev->srq, &wr, &badwr); else stat = ibv_post_recv(dev->qp, &wr, &badwr); if (stat != SUCCESS0) { if (Finished && errno == EINTR) return; error(SYS, "failed to post receive"); } } } /* * Post n RDMA requests. */ static void rd_post_rdma_std(DEVICE *dev, ibv_op opcode, int n) { struct ibv_sge sge ={ .addr = (uintptr_t) dev->buffer, .length = dev->msg_size, .lkey = dev->mr->lkey }; struct ibv_send_wr wr ={ .wr_id = WRID_RDMA, .sg_list = &sge, .num_sge = 1, .opcode = opcode, .send_flags = IBV_SEND_SIGNALED, .wr = { .rdma = { .remote_addr = dev->rnode.vaddr, .rkey = dev->rnode.rkey } } }; struct ibv_send_wr *badwr; if (opcode != IBV_WR_RDMA_READ && dev->msg_size <= dev->max_inline) wr.send_flags |= IBV_SEND_INLINE; errno = 0; while (!Finished && n--) { if (ibv_post_send(dev->qp, &wr, &badwr) != SUCCESS0) { if (Finished && errno == EINTR) return; error(SYS, "failed to post %s", opcode_name(wr.opcode)); } if (opcode != IBV_WR_RDMA_READ) { LStat.s.no_bytes += dev->msg_size; LStat.s.no_msgs++; } } } /* * Poll the completion queue. */ static int rd_poll(DEVICE *dev, struct ibv_wc *wc, int nwc) { int n; if (!Req.poll_mode && !Finished) { void *ectx; struct ibv_cq *ecq; if (ibv_get_cq_event(dev->channel, &ecq, &ectx) != SUCCESS0) return maybe(0, "failed to get CQ event"); if (ecq != dev->cq) error(0, "CQ event for unknown CQ"); if (ibv_req_notify_cq(dev->cq, 0) != SUCCESS0) return maybe(0, "failed to request CQ notification"); ibv_ack_cq_events(dev->cq, 1); } n = ibv_poll_cq(dev->cq, nwc, wc); if (n < 0) return maybe(0, "CQ poll failed"); return n; } /* * We encountered an error in a system call which might simply have been * interrupted by the alarm that signaled completion of the test. Generate the * error if appropriate or return the requested value. Final return is just to * silence the compiler. */ static int maybe(int val, char *msg) { if (Finished && errno == EINTR) return val; error(SYS, msg); return 0; } /* * Encode a NODE structure into a data stream. */ static void enc_node(NODE *host) { enc_int(host->vaddr, sizeof(host->vaddr)); enc_int(host->lid, sizeof(host->lid)); enc_int(host->qpn, sizeof(host->qpn)); enc_int(host->psn, sizeof(host->psn)); enc_int(host->srqn, sizeof(host->srqn)); enc_int(host->rkey, sizeof(host->rkey)); enc_int(host->alt_lid, sizeof(host->alt_lid)); enc_int(host->rd_atomic, sizeof(host->rd_atomic)); } /* * Decode a NODE structure from a data stream. */ static void dec_node(NODE *host) { host->vaddr = dec_int(sizeof(host->vaddr)); host->lid = dec_int(sizeof(host->lid)); host->qpn = dec_int(sizeof(host->qpn)); host->psn = dec_int(sizeof(host->psn)); host->srqn = dec_int(sizeof(host->srqn)); host->rkey = dec_int(sizeof(host->rkey)); host->alt_lid = dec_int(sizeof(host->alt_lid)); host->rd_atomic = dec_int(sizeof(host->rd_atomic)); } /* * Handle a CQ error and return true if it is recoverable. */ static void do_error(int status, uint64_t *errors) { ++*errors; cq_error(status); } /* * Print out a CQ error given a status. */ static void cq_error(int status) { int i; for (i = 0; i < cardof(CQErrors); ++i) if (CQErrors[i].value == status) error(0, "%s failed: %s", TestName, CQErrors[i].name); error(0, "%s failed: CQ error %d", TestName, status); } /* * Return the name of an opcode. */ static char * opcode_name(int opcode) { int i; for (i = 0; i < cardof(Opcodes); ++i) if (Opcodes[i].value == opcode) return Opcodes[i].name; return "unknown operation"; } qperf-0.4.10/src/rds.c000066400000000000000000000310471313370502100144340ustar00rootroot00000000000000/* * qperf - handle RDS tests. * * Copyright (c) 2012 Intel Corporation. All rights reserved. * Copyright (c) 2002-2009 Johann George. All rights reserved. * Copyright (c) 2006-2009 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include "qperf.h" /* * Parameters. */ #ifndef AF_RDS /* * Before RDS was part of the Linux kernel, it was a module that picked * an address famiy value and exported it via /proc. Use the /proc * value, if present, otherwise fall back to the actual value. */ #define DEFAULT_AF_RDS 21 /* actual value for AF_RDS */ static int get_af_rds(void) { static int af_rds = -1; if (af_rds == -1) { FILE *fp; int read_fail = 0; if ((fp = fopen("/proc/sys/net/rds/pf_rds", "r")) != NULL) { int n, val; n = fscanf(fp, "%d", &val); fclose(fp); if ((n == 1)) { /* success */ af_rds = val; } else { read_fail = 1; goto read_fail; } } else { read_fail: error(RET, "AF_RDS not defined. Unable to %s " "/proc/sys/net/rds/pf_rds. Using %d.\n", read_fail ? "read":"open", DEFAULT_AF_RDS); af_rds = DEFAULT_AF_RDS; } } return af_rds; } #define AF_RDS get_af_rds() #endif /* * Function prototypes. */ static void client_get_hosts(char *lhost, char *rhost); static void connect_tcp(char *server, char *port, SS *addr, socklen_t *len, int *fd); static void get_socket_ip(SA *saptr, int salen, char *ip, int n); static int get_socket_port(int fd); static int init(void); static void qgetnameinfo(SA *sa, socklen_t salen, char *host, size_t hostlen, char *serv, size_t servlen, int flags); static int rds_socket(char *host, int port); static void rds_makeaddr(SS *addr, socklen_t *len, char *host, int port); static void set_parameters(long msgSize); static void server_get_hosts(char *lhost, char *rhost); static void set_socket_buffer_size(int fd); /* * Static variables. */ static SS RAddr; static socklen_t RLen; /* * Measure RDS bandwidth (client side). */ void run_client_rds_bw(void) { char *buf; int sockfd; par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); set_parameters(8*1024); client_send_request(); sockfd = init(); buf = qmalloc(Req.msg_size); sync_test(); while (!Finished) { int n = sendto(sockfd, buf, Req.msg_size, 0, (SA *)&RAddr, RLen); if (Finished) break; if (n != Req.msg_size) { LStat.s.no_errs++; continue; } LStat.s.no_bytes += n; LStat.s.no_msgs++; } stop_test_timer(); exchange_results(); free(buf); close(sockfd); show_results(BANDWIDTH); } /* * Measure RDS bandwidth (server side). */ void run_server_rds_bw(void) { char *buf; int sockfd; sockfd = init(); sync_test(); buf = qmalloc(Req.msg_size); while (!Finished) { int n = read(sockfd, buf, Req.msg_size); if (Finished) break; if (n != Req.msg_size) { LStat.r.no_errs++; continue; } LStat.r.no_bytes += n; LStat.r.no_msgs++; if (Req.access_recv) touch_data(buf, Req.msg_size); } stop_test_timer(); exchange_results(); free(buf); close(sockfd); } /* * Measure RDS latency (client side). */ void run_client_rds_lat(void) { char *buf; int sockfd; set_parameters(1); client_send_request(); sockfd = init(); buf = qmalloc(Req.msg_size); sync_test(); while (!Finished) { int n = sendto(sockfd, buf, Req.msg_size, 0, (SA *)&RAddr, RLen); if (Finished) break; if (n != Req.msg_size) { LStat.s.no_errs++; continue; } LStat.s.no_bytes += n; LStat.s.no_msgs++; n = read(sockfd, buf, Req.msg_size); if (Finished) break; if (n != Req.msg_size) { LStat.r.no_errs++; continue; } LStat.r.no_bytes += n; LStat.r.no_msgs++; } stop_test_timer(); exchange_results(); free(buf); close(sockfd); show_results(LATENCY); } /* * Measure RDS latency (server side). */ void run_server_rds_lat(void) { char *buf; int sockfd; sockfd = init(); sync_test(); buf = qmalloc(Req.msg_size); while (!Finished) { SS raddr; socklen_t rlen = sizeof(raddr); int n = recvfrom(sockfd, buf, Req.msg_size, 0, (SA *)&raddr, &rlen); if (Finished) break; if (n != Req.msg_size) { LStat.r.no_errs++; continue; } LStat.r.no_bytes += n; LStat.r.no_msgs++; n = sendto(sockfd, buf, Req.msg_size, 0, (SA *)&raddr, rlen); if (Finished) break; if (n != Req.msg_size) { LStat.s.no_errs++; continue; } LStat.s.no_bytes += n; LStat.s.no_msgs++; } stop_test_timer(); exchange_results(); free(buf); close(sockfd); } /* * Set default IP parameters and ensure that any that are set are being used. */ static void set_parameters(long msgSize) { setp_u32(0, L_MSG_SIZE, msgSize); setp_u32(0, R_MSG_SIZE, msgSize); par_use(L_PORT); par_use(R_PORT); par_use(L_SOCK_BUF_SIZE); par_use(R_SOCK_BUF_SIZE); opt_check(); } /* * Initialize and return open socket. */ static int init(void) { int sockfd; uint32_t lport; uint32_t rport; char lhost[NI_MAXHOST]; char rhost[NI_MAXHOST]; if (is_client()) client_get_hosts(lhost, rhost); else server_get_hosts(lhost, rhost); sockfd = rds_socket(lhost, Req.port); lport = get_socket_port(sockfd); encode_uint32(&lport, lport); send_mesg(&lport, sizeof(lport), "RDS port"); recv_mesg(&rport, sizeof(rport), "RDS port"); rport = decode_uint32(&rport); rds_makeaddr(&RAddr, &RLen, rhost, rport); return sockfd; } /* * Have an exchange with the client over TCP/IP and get the IP of our local * host. */ static void server_get_hosts(char *lhost, char *rhost) { int fd, lfd; uint32_t port; struct sockaddr_in laddr, raddr; socklen_t rlen; lfd = socket(AF_INET, SOCK_STREAM, 0); if (lfd < 0) error(SYS, "socket failed"); setsockopt_one(lfd, SO_REUSEADDR); memset(&laddr, 0, sizeof(laddr)); laddr.sin_family = AF_INET; laddr.sin_addr.s_addr = INADDR_ANY; laddr.sin_port = htons(0); if (bind(lfd, (SA *)&laddr, sizeof(laddr)) < 0) error(SYS, "bind INET failed"); port = get_socket_port(lfd); encode_uint32(&port, port); send_mesg(&port, sizeof(port), "TCP IPv4 server port"); if (listen(lfd, 1) < 0) error(SYS, "listen failed"); rlen = sizeof(raddr); fd = accept(lfd, (SA *)&raddr, &rlen); if (fd < 0) error(SYS, "accept failed"); close(lfd); get_socket_ip((SA *)&raddr, rlen, rhost, NI_MAXHOST); send_mesg(rhost, NI_MAXHOST, "client IP"); recv_mesg(lhost, NI_MAXHOST, "server IP"); close(fd); } /* * Have an exchange with the server over TCP/IP and get the IPs of our local * and the remote host. */ static void client_get_hosts(char *lhost, char *rhost) { SS raddr; socklen_t rlen; char *service; uint32_t port; int fd = -1; recv_mesg(&port, sizeof(port), "TCP IPv4 server port"); port = decode_uint32(&port); service = qasprintf("%d", port); connect_tcp(ServerName, service, &raddr, &rlen, &fd); free(service); get_socket_ip((SA *)&raddr, rlen, rhost, NI_MAXHOST); send_mesg(rhost, NI_MAXHOST, "server IP"); recv_mesg(lhost, NI_MAXHOST, "client IP"); close(fd); } /* * Make a RDS socket. */ static int rds_socket(char *host, int port) { int sockfd; SS sockaddr; socklen_t socklen; sockfd = socket(AF_RDS, SOCK_SEQPACKET, 0); if (sockfd < 0) error(SYS, "socket failed"); setsockopt_one(sockfd, SO_REUSEADDR); rds_makeaddr(&sockaddr, &socklen, host, port); if (bind(sockfd, (SA *)&sockaddr, socklen) != SUCCESS0) error(SYS, "bind RDS failed"); set_socket_buffer_size(sockfd); return sockfd; } /* * Make a RDS address. */ static void rds_makeaddr(SS *addr, socklen_t *len, char *host, int port) { struct sockaddr_in *sap = (struct sockaddr_in *)addr; memset(sap, 0, sizeof(*sap)); sap->sin_family = AF_INET; inet_pton(AF_INET, host, &sap->sin_addr.s_addr); sap->sin_port = htons(port); *len = sizeof(struct sockaddr_in); } /* * Connect over TCP/IP to the server/port and return the socket structure, its * length and the open socket file descriptor. */ static void connect_tcp(char *server, char *port, SS *addr, socklen_t *len, int *fd) { int stat; struct addrinfo *aip, *ailist; struct addrinfo hints ={ .ai_flags = AI_NUMERICSERV, .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; stat = getaddrinfo(server, port, &hints, &ailist); if (stat != 0) error(0, "getaddrinfo failed: %s", gai_strerror(stat)); for (aip = ailist; aip; aip = aip->ai_next) { if (fd) { *fd = socket(aip->ai_family, aip->ai_socktype, aip->ai_protocol); if (*fd < 0) error(SYS, "socket failed"); if (connect(*fd, aip->ai_addr, aip->ai_addrlen) < 0) error(SYS, "connect failed"); break; } break; } if (!aip) error(0, "connect_tcp failed"); memcpy(addr, aip->ai_addr, aip->ai_addrlen); *len = aip->ai_addrlen; freeaddrinfo(ailist); } /* * Given an open socket, return the port associated with it. There must be a * more efficient way to do this that is portable. */ static int get_socket_port(int fd) { int port; char p[NI_MAXSERV]; SS sa; socklen_t salen = sizeof(sa); if (getsockname(fd, (SA *)&sa, &salen) < 0) error(SYS, "getsockname failed"); qgetnameinfo((SA *)&sa, salen, 0, 0, p, sizeof(p), NI_NUMERICSERV); port = atoi(p); if (!port) error(SYS, "invalid port"); return port; } /* * Given a socket, return its IP address. */ static void get_socket_ip(SA *saptr, int salen, char *ip, int n) { qgetnameinfo(saptr, salen, ip, n, 0, 0, NI_NUMERICHOST); } /* * Call getnameinfo and exit with an error on failure. */ static void qgetnameinfo(SA *sa, socklen_t salen, char *host, size_t hostlen, char *serv, size_t servlen, int flags) { int stat = getnameinfo(sa, salen, host, hostlen, serv, servlen, flags); if (stat < 0) error(0, "getnameinfo failed: %s", gai_strerror(stat)); } /* * Set both the send and receive socket buffer sizes. */ static void set_socket_buffer_size(int fd) { int size = Req.sock_buf_size; if (!size) return; if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)) < 0) error(SYS, "failed to set send buffer size on socket"); if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)) < 0) error(SYS, "failed to set receive buffer size on socket"); } qperf-0.4.10/src/socket.c000066400000000000000000000415211313370502100151320ustar00rootroot00000000000000/* * qperf - handle socket tests. * * Copyright (c) 2002-2009 Johann George. All rights reserved. * Copyright (c) 2006-2009 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include "qperf.h" /* * Parameters. */ #define AF_INET_SDP 27 /* Family for SDP */ /* * Kinds. */ typedef enum { K_SCTP, K_SDP, K_TCP, K_UDP, } KIND; char *Kinds[] ={ "SCTP", "SDP", "TCP", "UDP", }; /* * Function prototypes. */ static void client_init(int *fd, KIND kind); static void datagram_client_bw(KIND kind); static void datagram_client_lat(KIND kind); static void datagram_server_bw(KIND kind); static void datagram_server_init(int *fd, KIND kind); static void datagram_server_lat(KIND kind); static void get_socket_port(int fd, uint32_t *port); static AI *getaddrinfo_kind(int serverflag, KIND kind, int port); static void ip_parameters(long msgSize); static char *kind_name(KIND kind); static int recv_full(int fd, void *ptr, int len); static int send_full(int fd, void *ptr, int len); static void set_socket_buffer_size(int fd); static void stream_client_bw(KIND kind); static void stream_client_lat(KIND kind); static void stream_server_bw(KIND kind); static void stream_server_init(int *fd, KIND kind); static void stream_server_lat(KIND kind); /* * Measure SCTP bandwidth (client side). */ void run_client_sctp_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); ip_parameters(32*1024); stream_client_bw(K_SCTP); } /* * Measure SCTP bandwidth (server side). */ void run_server_sctp_bw(void) { stream_server_bw(K_SCTP); } /* * Measure SCTP latency (client side). */ void run_client_sctp_lat(void) { ip_parameters(1); stream_client_lat(K_SCTP); } /* * Measure SCTP latency (server side). */ void run_server_sctp_lat(void) { stream_server_lat(K_SCTP); } /* * Measure SDP bandwidth (client side). */ void run_client_sdp_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); ip_parameters(64*1024); stream_client_bw(K_SDP); } /* * Measure SDP bandwidth (server side). */ void run_server_sdp_bw(void) { stream_server_bw(K_SDP); } /* * Measure SDP latency (client side). */ void run_client_sdp_lat(void) { ip_parameters(1); stream_client_lat(K_SDP); } /* * Measure SDP latency (server side). */ void run_server_sdp_lat(void) { stream_server_lat(K_SDP); } /* * Measure TCP bandwidth (client side). */ void run_client_tcp_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); ip_parameters(64*1024); stream_client_bw(K_TCP); } /* * Measure TCP bandwidth (server side). */ void run_server_tcp_bw(void) { stream_server_bw(K_TCP); } /* * Measure TCP latency (client side). */ void run_client_tcp_lat(void) { ip_parameters(1); stream_client_lat(K_TCP); } /* * Measure TCP latency (server side). */ void run_server_tcp_lat(void) { stream_server_lat(K_TCP); } /* * Measure UDP bandwidth (client side). */ void run_client_udp_bw(void) { par_use(L_ACCESS_RECV); par_use(R_ACCESS_RECV); ip_parameters(32*1024); datagram_client_bw(K_UDP); } /* * Measure UDP bandwidth (server side). */ void run_server_udp_bw(void) { datagram_server_bw(K_UDP); } /* * Measure UDP latency (client side). */ void run_client_udp_lat(void) { ip_parameters(1); datagram_client_lat(K_UDP); } /* * Measure UDP latency (server side). */ void run_server_udp_lat(void) { datagram_server_lat(K_UDP); } /* * Measure stream bandwidth (client side). */ static void stream_client_bw(KIND kind) { char *buf; int sockFD; client_init(&sockFD, kind); buf = qmalloc(Req.msg_size); sync_test(); while (!Finished) { int n = send_full(sockFD, buf, Req.msg_size); if (Finished) break; if (n < 0) { LStat.s.no_errs++; continue; } LStat.s.no_bytes += n; LStat.s.no_msgs++; } stop_test_timer(); exchange_results(); free(buf); close(sockFD); show_results(BANDWIDTH); } /* * Measure stream bandwidth (server side). */ static void stream_server_bw(KIND kind) { int sockFD = -1; char *buf = 0; stream_server_init(&sockFD, kind); sync_test(); buf = qmalloc(Req.msg_size); while (!Finished) { int n = recv_full(sockFD, buf, Req.msg_size); if (Finished) break; if (n < 0) { LStat.r.no_errs++; continue; } LStat.r.no_bytes += n; LStat.r.no_msgs++; if (Req.access_recv) touch_data(buf, Req.msg_size); } stop_test_timer(); exchange_results(); free(buf); if (sockFD >= 0) close(sockFD); } /* * Measure stream latency (client side). */ static void stream_client_lat(KIND kind) { char *buf; int sockFD; client_init(&sockFD, kind); buf = qmalloc(Req.msg_size); sync_test(); while (!Finished) { int n = send_full(sockFD, buf, Req.msg_size); if (Finished) break; if (n < 0) { LStat.s.no_errs++; continue; } LStat.s.no_bytes += n; LStat.s.no_msgs++; n = recv_full(sockFD, buf, Req.msg_size); if (Finished) break; if (n < 0) { LStat.r.no_errs++; continue; } LStat.r.no_bytes += n; LStat.r.no_msgs++; } stop_test_timer(); exchange_results(); free(buf); close(sockFD); show_results(LATENCY); } /* * Measure stream latency (server side). */ static void stream_server_lat(KIND kind) { int sockFD = -1; char *buf = 0; stream_server_init(&sockFD, kind); sync_test(); buf = qmalloc(Req.msg_size); while (!Finished) { int n = recv_full(sockFD, buf, Req.msg_size); if (Finished) break; if (n < 0) { LStat.r.no_errs++; continue; } LStat.r.no_bytes += n; LStat.r.no_msgs++; n = send_full(sockFD, buf, Req.msg_size); if (Finished) break; if (n < 0) { LStat.s.no_errs++; continue; } LStat.s.no_bytes += n; LStat.s.no_msgs++; } stop_test_timer(); exchange_results(); free(buf); close(sockFD); } /* * Measure datagram bandwidth (client side). */ static void datagram_client_bw(KIND kind) { char *buf; int sockFD; client_init(&sockFD, kind); buf = qmalloc(Req.msg_size); sync_test(); while (!Finished) { int n = write(sockFD, buf, Req.msg_size); if (Finished) break; if (n < 0) { LStat.s.no_errs++; continue; } LStat.s.no_bytes += n; LStat.s.no_msgs++; } stop_test_timer(); exchange_results(); free(buf); close(sockFD); show_results(BANDWIDTH_SR); } /* * Measure datagram bandwidth (server side). */ static void datagram_server_bw(KIND kind) { int sockFD; char *buf = 0; datagram_server_init(&sockFD, kind); sync_test(); buf = qmalloc(Req.msg_size); while (!Finished) { int n = recv(sockFD, buf, Req.msg_size, 0); if (Finished) break; if (n < 0) { LStat.r.no_errs++; continue; } LStat.r.no_bytes += n; LStat.r.no_msgs++; if (Req.access_recv) touch_data(buf, Req.msg_size); } stop_test_timer(); exchange_results(); free(buf); close(sockFD); } /* * Measure datagram latency (client side). */ static void datagram_client_lat(KIND kind) { char *buf; int sockFD; client_init(&sockFD, kind); buf = qmalloc(Req.msg_size); sync_test(); while (!Finished) { int n = write(sockFD, buf, Req.msg_size); if (Finished) break; if (n < 0) { LStat.s.no_errs++; continue; } LStat.s.no_bytes += n; LStat.s.no_msgs++; n = read(sockFD, buf, Req.msg_size); if (Finished) break; if (n < 0) { LStat.r.no_errs++; continue; } LStat.r.no_bytes += n; LStat.r.no_msgs++; } stop_test_timer(); exchange_results(); free(buf); close(sockFD); show_results(LATENCY); } /* * Measure datagram latency (server side). */ static void datagram_server_lat(KIND kind) { int sockfd; char *buf = 0; datagram_server_init(&sockfd, kind); sync_test(); buf = qmalloc(Req.msg_size); while (!Finished) { SS clientAddr; socklen_t clientLen = sizeof(clientAddr); int n = recvfrom(sockfd, buf, Req.msg_size, 0, (SA *)&clientAddr, &clientLen); if (Finished) break; if (n < 0) { LStat.r.no_errs++; continue; } LStat.r.no_bytes += n; LStat.r.no_msgs++; n = sendto(sockfd, buf, Req.msg_size, 0, (SA *)&clientAddr, clientLen); if (Finished) break; if (n < 0) { LStat.s.no_errs++; continue; } LStat.s.no_bytes += n; LStat.s.no_msgs++; } stop_test_timer(); exchange_results(); free(buf); close(sockfd); } /* * Set default IP parameters and ensure that any that are set are being used. */ static void ip_parameters(long msgSize) { setp_u32(0, L_MSG_SIZE, msgSize); setp_u32(0, R_MSG_SIZE, msgSize); par_use(L_PORT); par_use(R_PORT); par_use(L_SOCK_BUF_SIZE); par_use(R_SOCK_BUF_SIZE); opt_check(); } /* * Socket client initialization. */ static void client_init(int *fd, KIND kind) { uint32_t rport; AI *ai, *ailist; client_send_request(); recv_mesg(&rport, sizeof(rport), "port"); rport = decode_uint32(&rport); ailist = getaddrinfo_kind(0, kind, rport); for (ai = ailist; ai; ai = ai->ai_next) { if (!ai->ai_family) continue; *fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); setsockopt_one(*fd, SO_REUSEADDR); if (connect(*fd, ai->ai_addr, ai->ai_addrlen) == SUCCESS0) break; close(*fd); } freeaddrinfo(ailist); if (!ai) error(0, "could not make %s connection to server", kind_name(kind)); if (Debug) { uint32_t lport; get_socket_port(*fd, &lport); debug("sending from %s port %d to %d", kind_name(kind), lport, rport); } } /* * Socket server initialization. */ static void stream_server_init(int *fd, KIND kind) { uint32_t port; AI *ai; int listenFD = -1; AI *ailist = getaddrinfo_kind(1, kind, Req.port); for (ai = ailist; ai; ai = ai->ai_next) { if (!ai->ai_family) continue; listenFD = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); if (listenFD < 0) continue; setsockopt_one(listenFD, SO_REUSEADDR); if (bind(listenFD, ai->ai_addr, ai->ai_addrlen) == SUCCESS0) break; close(listenFD); listenFD = -1; } freeaddrinfo(ailist); if (!ai) error(0, "unable to make %s socket", kind_name(kind)); if (listen(listenFD, 1) < 0) error(SYS, "listen failed"); get_socket_port(listenFD, &port); encode_uint32(&port, port); send_mesg(&port, sizeof(port), "port"); *fd = accept(listenFD, 0, 0); if (*fd < 0) error(SYS, "accept failed"); debug("accepted %s connection", kind_name(kind)); set_socket_buffer_size(*fd); close(listenFD); debug("receiving to %s port %d", kind_name(kind), port); } /* * Datagram server initialization. */ static void datagram_server_init(int *fd, KIND kind) { uint32_t port; AI *ai; int sockfd = -1; AI *ailist = getaddrinfo_kind(1, kind, Req.port); for (ai = ailist; ai; ai = ai->ai_next) { if (!ai->ai_family) continue; sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); if (sockfd < 0) continue; setsockopt_one(sockfd, SO_REUSEADDR); if (bind(sockfd, ai->ai_addr, ai->ai_addrlen) == SUCCESS0) break; close(sockfd); sockfd = -1; } freeaddrinfo(ailist); if (!ai) error(0, "unable to make %s socket", kind_name(kind)); set_socket_buffer_size(sockfd); get_socket_port(sockfd, &port); encode_uint32(&port, port); send_mesg(&port, sizeof(port), "port"); *fd = sockfd; } /* * A version of getaddrinfo that takes a numeric port and prints out an error * on failure. */ static AI * getaddrinfo_kind(int serverflag, KIND kind, int port) { AI *aip, *ailist; AI hints ={ .ai_flags = AI_NUMERICSERV, .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; if (serverflag) hints.ai_flags |= AI_PASSIVE; if (kind == K_UDP) hints.ai_socktype = SOCK_DGRAM; ailist = getaddrinfo_port(serverflag ? 0 : ServerName, port, &hints); for (aip = ailist; aip; aip = aip->ai_next) { if (kind == K_SDP) { if (aip->ai_family == AF_INET || aip->ai_family == AF_INET6) aip->ai_family = AF_INET_SDP; else aip->ai_family = 0; } else if (kind == K_SCTP) { if (aip->ai_protocol == IPPROTO_TCP) aip->ai_protocol = IPPROTO_SCTP; else aip->ai_family = 0; } } return ailist; } /* * Set both the send and receive socket buffer sizes. */ static void set_socket_buffer_size(int fd) { int size = Req.sock_buf_size; if (!size) return; if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)) < 0) error(SYS, "Failed to set send buffer size on socket"); if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)) < 0) error(SYS, "Failed to set receive buffer size on socket"); } /* * Given an open socket, return the port associated with it. There must be a * more efficient way to do this that is portable. */ static void get_socket_port(int fd, uint32_t *port) { char p[NI_MAXSERV]; SS sa; socklen_t salen = sizeof(sa); if (getsockname(fd, (SA *)&sa, &salen) < 0) error(SYS, "getsockname failed"); if (getnameinfo((SA *)&sa, salen, 0, 0, p, sizeof(p), NI_NUMERICSERV) < 0) error(SYS, "getnameinfo failed"); *port = atoi(p); if (!port) error(0, "invalid port"); } /* * Send a complete message to a socket. A zero byte write indicates an end of * file which suggests that we are finished. */ static int send_full(int fd, void *ptr, int len) { int n = len; while (!Finished && n) { int i = write(fd, ptr, n); if (i < 0) return i; ptr += i; n -= i; if (i == 0) set_finished(); } return len-n; } /* * Receive a complete message from a socket. A zero byte read indicates an end * of file which suggests that we are finished. */ static int recv_full(int fd, void *ptr, int len) { int n = len; while (!Finished && n) { int i = read(fd, ptr, n); if (i < 0) return i; ptr += i; n -= i; if (i == 0) set_finished(); } return len-n; } /* * Return the name of a transport kind. */ static char * kind_name(KIND kind) { if (kind < 0 || kind >= cardof(Kinds)) return "unknown type"; else return Kinds[kind]; } qperf-0.4.10/src/support.c000066400000000000000000000324361313370502100153630ustar00rootroot00000000000000/* * qperf - support routines. * Measure socket and RDMA performance. * * Copyright (c) 2002-2009 Johann George. All rights reserved. * Copyright (c) 2006-2009 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include "qperf.h" /* * Configurable parameters. */ #define ERROR_TIMEOUT 3 /* Error timeout in seconds */ /* * For convenience. */ typedef void (SIGFUNC)(int signo, siginfo_t *siginfo, void *ucontext); /* * Function prototypes. */ static void buf_app(char **pp, char *end, char *str); static void buf_end(char **pp, char *end); static double get_seconds(void); static void remote_failure_error(void); static char *remote_name(void); static int send_recv_mesg(int sr, char *item, int fd, char *buf, int len); static SIGFUNC sig_alrm_remote_failure; static SIGFUNC sig_alrm_die; static void timeout_set(int seconds, SIGFUNC sigfunc); static void timeout_end(void); /* * Static variables. */ static uint8_t *DecodePtr; static uint8_t *EncodePtr; /* * Initialize encode pointer. */ void enc_init(void *p) { EncodePtr = p; } /* * Initialize decode pointer. */ void dec_init(void *p) { DecodePtr = p; } /* * Encode a string. */ void enc_str(char *s, int n) { memcpy(EncodePtr, s, n); EncodePtr += n; } /* * Decode a string. */ void dec_str(char *s, int n) { memcpy(s, DecodePtr, n); DecodePtr += n; } /* * Encode an integer. */ void enc_int(int64_t l, int n) { while (n--) { *EncodePtr++ = l; l >>= 8; } } /* * Decode an integer. */ int64_t dec_int(int n) { uint64_t l = 0; uint8_t *p = (DecodePtr += n); while (n--) l = (l << 8) | (*--p & 0xFF); return l; } /* * Encode a 32 bit unsigned integer. */ void encode_uint32(uint32_t *p, uint32_t v) { enc_init(p); enc_int(v, sizeof(v)); } /* * Decode a 32 bit unsigned integer. */ uint32_t decode_uint32(uint32_t *p) { dec_init(p); return dec_int(sizeof(uint32_t)); } /* * Call malloc and exit with an error on failure. */ void * qmalloc(long n) { void *p = malloc(n); if (!p) error(0, "malloc failed"); return p; } /* * Attempt to print out a string allocating the necessary storage and exit with * an error on failure. */ char * qasprintf(char *fmt, ...) { int stat; char *str; va_list alist; va_start(alist, fmt); stat = vasprintf(&str, fmt, alist); va_end(alist); if (stat < 0) error(0, "out of space"); return str; } /* * Touch data. */ void touch_data(void *p, int n) { volatile uint64_t *p64 = p; while (n >= sizeof(*p64)) { (void) *p64++; n -= sizeof(*p64); } if (n) { volatile uint8_t *p8 = (uint8_t *)p64; while (n >= sizeof(*p8)) { (void) *p8++; n -= sizeof(*p8); } } } /* * Synchronize the client and server. */ void synchronize(char *msg) { send_sync(msg); recv_sync(msg); debug("synchronization complete"); } /* * Send a synchronize message. */ void send_sync(char *msg) { int n = strlen(msg); send_mesg(msg, n, msg); } /* * Receive a synchronize message. */ void recv_sync(char *msg) { char data[64]; int n = strlen(msg); if (n > sizeof(data)) error(BUG, "buffer in recv_sync() too small"); recv_mesg(data, n, msg); if (memcmp(data, msg, n) != 0) error(0, "synchronize %s failure: data does not match", msg); } /* * Send a message to the client. */ int send_mesg(void *ptr, int len, char *item) { if (item) debug("sending %s", item); return send_recv_mesg('s', item, RemoteFD, ptr, len); } /* * Receive a response from the server. */ int recv_mesg(void *ptr, int len, char *item) { if (item) debug("waiting for %s", item); return send_recv_mesg('r', item, RemoteFD, ptr, len); } /* * Send or receive a message to a file descriptor timing out after a certain * amount of time. */ static int send_recv_mesg(int sr, char *item, int fd, char *buf, int len) { typedef ssize_t (IO)(int fd, void *buf, size_t count); double etime; fd_set *fdset; fd_set rfdset; fd_set wfdset; char *action; IO *func; int ioc = 0; if (sr == 'r') { func = (IO *)read; fdset = &rfdset; action = "receive"; } else { func = (IO *)write; fdset = &wfdset; action = "send"; } etime = get_seconds() + Req.timeout; while (len) { int n; double time; struct timeval timeval; errno = 0; time = etime - get_seconds(); if (time <= 0) { if (!item) return ioc; error(0, "failed to %s %s: timed out", action, item); } n = time += 1.0 / (1000*1000); timeval.tv_sec = n; timeval.tv_usec = (time-n) * 1000*1000; FD_ZERO(&rfdset); FD_ZERO(&wfdset); FD_SET(fd, fdset); if (select(fd+1, &rfdset, &wfdset, 0, &timeval) < 0) error(SYS, "failed to %s %s: select failed", action, item); if (!FD_ISSET(fd, fdset)) continue; n = func(fd, buf, len); if (n <= 0) { if (!item) return ioc; if (n < 0) error(SYS, "failed to %s %s", action, item); if (n == 0) { error(0, "failed to %s %s: %s not responding", action, item, remote_name()); } } len -= n; ioc += n; buf += n; } return ioc; } /* * Get the time of day in seconds as a floating point number. */ static double get_seconds(void) { struct timeval timeval; if (gettimeofday(&timeval, 0) < 0) error(SYS, "gettimeofday failed"); return timeval.tv_sec + timeval.tv_usec/(1000.0*1000.0); } /* * Call getaddrinfo given a numeric port. Complain on error. */ struct addrinfo * getaddrinfo_port(char *node, int port, struct addrinfo *hints) { struct addrinfo *res; char *service = qasprintf("%d", port); int stat = getaddrinfo(node, service, hints, &res); free(service); if (stat != 0) error(0, "getaddrinfo failed: %s", gai_strerror(stat)); if (!res) error(0, "getaddrinfo failed: no valid entries"); return res; } /* * A version of setsockopt that sets a parameter to 1 and exits with an error * on failure. */ void setsockopt_one(int fd, int optname) { int one = 1; if (setsockopt(fd, SOL_SOCKET, optname, &one, sizeof(one)) >= 0) return; error(SYS, "setsockopt %d %d to 1 failed", SOL_SOCKET, optname); } /* * This is called when a SIGURG signal is received indicating that TCP * out-of-band data has arrived. This is used by the remote end to indicate * one of two conditions: the test has completed or an error has occurred. */ void urgent(void) { char *p, *q; char buffer[256]; /* * There is a slim chance that an urgent message arrived before accept * returned. This is likely not even possible with the current code flow * but we check just in case. */ if (RemoteFD < 0) return; /* * This recv could fail if for some reason our socket buffer was full of * in-band data and the remote side could not send the out of band data. * If the recv fails with EWOULDBLOCK, we should keep reading in-band data * until we clear the in-band data. Since we do not send enough data for * this case to cause us concern in the normal case, we do not expect this * to ever occur. If it does, we let the lower levels deal with it. */ if (recv(RemoteFD, buffer, 1, MSG_OOB) != 1) return; /* * If the indication is that the other side has completed its testing, * indicate completion on our side also. */ if (buffer[0] == '.') { set_finished(); return; } /* * If we are the server, we only print out client error messages if we are * in debug mode. */ if (!Debug && !is_client()) die(); p = buffer; q = p + sizeof(buffer); buf_app(&p, q, remote_name()); buf_app(&p, q, ": "); timeout_set(ERROR_TIMEOUT, sig_alrm_remote_failure); for (;;) { int s = sockatmark(RemoteFD); if (s < 0) remote_failure_error(); if (s) break; (void) read(RemoteFD, p, q-p); } while (p < q) { int n = read(RemoteFD, p, q-p); if (n <= 0) break; p += n; } timeout_end(); buf_end(&p, q); (void) write(2, buffer, p+1-buffer); die(); } /* * Remote end timed out in an attempt to find the error. */ static void sig_alrm_remote_failure(int signo, siginfo_t *siginfo, void *ucontext) { remote_failure_error(); } /* * The remote timed out while attempting to convey an error. Tell the user. */ static void remote_failure_error(void) { char buffer[256]; char *p = buffer; char *q = p + sizeof(buffer); buf_app(&p, q, remote_name()); buf_app(&p, q, " failure"); buf_end(&p, q); (void) write(2, buffer, p+1-buffer); die(); } /* * Return a string describing whether the remote is a client or a server. */ static char * remote_name(void) { if (is_client()) return "server"; else return "client"; } /* * Print out an error message. actions contain a set of flags that determine * what needs to get done. If BUG is set, it is an internal error. If SYS is * set, a system error is printed. If RET is set, we return rather than exit. */ int error(int actions, char *fmt, ...) { va_list alist; char buffer[256]; char *p = buffer; char *q = p + sizeof(buffer); if ((actions & BUG) != 0) buf_app(&p, q, "internal error: "); va_start(alist, fmt); p += vsnprintf(p, q-p, fmt, alist); va_end(alist); if ((actions & SYS) != 0 && errno) { buf_app(&p, q, ": "); buf_app(&p, q, strerror(errno)); } buf_end(&p, q); fwrite(buffer, 1, p+1-buffer, stdout); if ((actions & RET) != 0) return 0; if (RemoteFD >= 0) { send(RemoteFD, "?", 1, MSG_OOB); (void) write(RemoteFD, buffer, p-buffer); shutdown(RemoteFD, SHUT_WR); timeout_set(ERROR_TIMEOUT, sig_alrm_die); while (read(RemoteFD, buffer, sizeof(buffer)) > 0) ; } die(); return 0; } /* * Remote end timed out while waiting for acknowledgement that it received * error. */ static void sig_alrm_die(int signo, siginfo_t *siginfo, void *ucontext) { die(); } /* * Start timeout. */ static void timeout_set(int seconds, SIGFUNC sigfunc) { struct itimerval itimerval = {{0}}; struct sigaction act ={ .sa_sigaction = sigfunc, .sa_flags = SA_SIGINFO }; setitimer(ITIMER_REAL, &itimerval, 0); sigaction(SIGALRM, &act, 0); itimerval.it_value.tv_sec = seconds; setitimer(ITIMER_REAL, &itimerval, 0); } /* * End timeout. */ static void timeout_end(void) { struct itimerval itimerval = {{0}}; setitimer(ITIMER_REAL, &itimerval, 0); } /* * Add a string to a buffer. */ static void buf_app(char **pp, char *end, char *str) { char *p = *pp; int n = strlen(str); int l = end - p; if (n > l) n = l; memcpy(p, str, n); *pp = p + n; } /* * End a buffer. */ static void buf_end(char **pp, char *end) { char *p = *pp; if (p == end) { char *s = " ..."; int n = strlen(s); memcpy(--p-n, s, n); } *p = '\n'; *pp = p; } /* * Print out a debug message. */ void debug(char *fmt, ...) { va_list alist; if (!Debug) return; va_start(alist, fmt); vfprintf(stderr, fmt, alist); va_end(alist); fprintf(stderr, "\n"); fflush(stderr); } /* * Exit unsuccessfully. */ void die(void) { exit(1); }